##### 1: Column Information

In [2]:
import pandas as pd
parks = pd.read_csv('nationalpark_visitors.csv')
parks.head()

Unnamed: 0,index,Park,Location,AnnualPassPrice,Area,ParkType,Year2021,Year2020,Year2019
0,1,Great Smoky Mountains,"Gatlinburg, TN",40,816.3 sqmi,National park,14161548,12095720,12547743.0
1,2,Zion,"Springdale, UT",70,229.1 sqmi,National park,5039835,3591254,4488268.0
2,3,Yellowstone,"Jackson, WY",70,3468.4 sqmi,National park,4860242,3806306,4020288.0
3,4,Grand Canyon,"Grand Canyon Village, AZ",80,1902.0 sqmi,National park,4532677,2897098,5974411.0
4,5,Rocky Mountain,"Estes Park, CO",70,414.8 sqmi,National park,4434848,3305199,4670053.0


In [3]:
parks.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   index            72 non-null     int64  
 1   Park             72 non-null     object 
 2   Location         72 non-null     object 
 3   AnnualPassPrice  72 non-null     int64  
 4   Area             72 non-null     object 
 5   ParkType         72 non-null     object 
 6   Year2021         72 non-null     int64  
 7   Year2020         72 non-null     int64  
 8   Year2019         71 non-null     float64
dtypes: float64(1), int64(4), object(4)
memory usage: 5.2+ KB


##### 2: Renaming and Removing Columns

In [4]:
new_column_name = {
    'Year2019': 'Visitors2019',
    'Year2020': 'Visitors2020',
    'Year2021': 'Visitors2021'
}
parks = parks.rename(mapper=new_column_name, axis=1)
parks.head()

Unnamed: 0,index,Park,Location,AnnualPassPrice,Area,ParkType,Visitors2021,Visitors2020,Visitors2019
0,1,Great Smoky Mountains,"Gatlinburg, TN",40,816.3 sqmi,National park,14161548,12095720,12547743.0
1,2,Zion,"Springdale, UT",70,229.1 sqmi,National park,5039835,3591254,4488268.0
2,3,Yellowstone,"Jackson, WY",70,3468.4 sqmi,National park,4860242,3806306,4020288.0
3,4,Grand Canyon,"Grand Canyon Village, AZ",80,1902.0 sqmi,National park,4532677,2897098,5974411.0
4,5,Rocky Mountain,"Estes Park, CO",70,414.8 sqmi,National park,4434848,3305199,4670053.0


In [5]:
parks = parks.drop(labels='index', axis=1)
parks.head()

Unnamed: 0,Park,Location,AnnualPassPrice,Area,ParkType,Visitors2021,Visitors2020,Visitors2019
0,Great Smoky Mountains,"Gatlinburg, TN",40,816.3 sqmi,National park,14161548,12095720,12547743.0
1,Zion,"Springdale, UT",70,229.1 sqmi,National park,5039835,3591254,4488268.0
2,Yellowstone,"Jackson, WY",70,3468.4 sqmi,National park,4860242,3806306,4020288.0
3,Grand Canyon,"Grand Canyon Village, AZ",80,1902.0 sqmi,National park,4532677,2897098,5974411.0
4,Rocky Mountain,"Estes Park, CO",70,414.8 sqmi,National park,4434848,3305199,4670053.0


##### 3: Calculations in Python

In [6]:
grand_canyon_price = parks.loc[parks['Park'] == 'Grand Canyon', 'AnnualPassPrice'].item()
amount_saved = grand_canyon_price * 0.3
new_price = round(grand_canyon_price - amount_saved)
new_price

56

In [7]:
visitors2019, visitors2020 = [float(x) for x in parks.loc[parks['Park'] == 'Zion', ['Visitors2019', 'Visitors2020']].values[0]]
percentchange_2020 = (visitors2020 - visitors2019) / visitors2020 * 100
percentchange_2020

-24.977737581357378

In [8]:
rounded_percent = round(percentchange_2020, 2)
rounded_percent

-24.98

##### 4: Column Calculations in Pandas

In [9]:
parks['Change2020'] = parks['Visitors2020'] - parks['Visitors2019']
parks.loc[:, 'Visitors2020':'Change2020']

Unnamed: 0,Visitors2020,Visitors2019,Change2020
0,12095720,12547743.0,-452023.0
1,3591254,4488268.0,-897014.0
2,3806306,4020288.0,-213982.0
3,2897098,5974411.0,-3077313.0
4,3305199,4670053.0,-1364854.0
...,...,...,...
67,12533,17216.0,-4683.0
68,11185,15766.0,-4581.0
69,2872,10518.0,-7646.0
70,2642,2642.0,0.0


In [10]:
parks['PercentChange2020'] = parks['Change2020'] / parks['Visitors2019'] * 100
parks.head()

Unnamed: 0,Park,Location,AnnualPassPrice,Area,ParkType,Visitors2021,Visitors2020,Visitors2019,Change2020,PercentChange2020
0,Great Smoky Mountains,"Gatlinburg, TN",40,816.3 sqmi,National park,14161548,12095720,12547743.0,-452023.0,-3.602425
1,Zion,"Springdale, UT",70,229.1 sqmi,National park,5039835,3591254,4488268.0,-897014.0,-19.98575
2,Yellowstone,"Jackson, WY",70,3468.4 sqmi,National park,4860242,3806306,4020288.0,-213982.0,-5.322554
3,Grand Canyon,"Grand Canyon Village, AZ",80,1902.0 sqmi,National park,4532677,2897098,5974411.0,-3077313.0,-51.508224
4,Rocky Mountain,"Estes Park, CO",70,414.8 sqmi,National park,4434848,3305199,4670053.0,-1364854.0,-29.225664


In [11]:
parks['PercentChange2020'] = round(parks['PercentChange2020'], 2)
parks['PercentChange2020'].describe()

count    71.000000
mean    -29.499014
std      28.994767
min     -99.140000
25%     -47.405000
50%     -27.130000
75%     -10.295000
max      25.590000
Name: PercentChange2020, dtype: float64

##### 5: Splitting and Combining Columns

In [12]:
parks[['AreaValues', 'Area']] = parks['Area'].str.split(pat=' ', expand=True)
parks[['Area', 'AreaValues']].head()

Unnamed: 0,Area,AreaValues
0,sqmi,816.3
1,sqmi,229.1
2,sqmi,3468.4
3,sqmi,1902.0
4,sqmi,414.8


In [13]:
parks['ParkTitle'] = parks['Park'].str.cat(parks['ParkType'], sep=' ') 
parks[['Park', 'ParkType', 'ParkTitle']].head()

Unnamed: 0,Park,ParkType,ParkTitle
0,Great Smoky Mountains,National park,Great Smoky Mountains National park
1,Zion,National park,Zion National park
2,Yellowstone,National park,Yellowstone National park
3,Grand Canyon,National park,Grand Canyon National park
4,Rocky Mountain,National park,Rocky Mountain National park


In [14]:
parks['State'] = parks['Location'].str.split(pat=',', expand=True)[1]
parks[['Location', 'State']].head()

Unnamed: 0,Location,State
0,"Gatlinburg, TN",TN
1,"Springdale, UT",UT
2,"Jackson, WY",WY
3,"Grand Canyon Village, AZ",AZ
4,"Estes Park, CO",CO


##### 6: Modifying Text Data

In [15]:
parks['ParkTitle'] = parks['ParkTitle'].str.title()
parks['ParkTitle'].head()

0    Great Smoky Mountains National Park
1                     Zion National Park
2              Yellowstone National Park
3             Grand Canyon National Park
4           Rocky Mountain National Park
Name: ParkTitle, dtype: object

In [16]:
parks['State'] = parks['State'].str.strip()
parks['State'].head()

0    TN
1    UT
2    WY
3    AZ
4    CO
Name: State, dtype: object

In [17]:
parks['ParkTitle'] = parks['ParkTitle'].str.replace(pat='&', repl='And', regex=False)
parks.loc[37,:]

Park                                            Great Sand Dunes
Location                                               Mosca, CO
AnnualPassPrice                                               45
Area                                                        sqmi
ParkType                                National park & preserve
Visitors2021                                              602613
Visitors2020                                              461532
Visitors2019                                            527546.0
Change2020                                              -66014.0
PercentChange2020                                         -12.51
AreaValues                                                 232.9
ParkTitle            Great Sand Dunes National Park And Preserve
State                                                         CO
Name: 37, dtype: object

##### 7: Changing Data Types

In [18]:
parks['AreaValues'] = parks['AreaValues'].astype('float64')

In [19]:
parks['State'] = parks['State'].astype('category')

In [20]:
parks.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype   
---  ------             --------------  -----   
 0   Park               72 non-null     object  
 1   Location           72 non-null     object  
 2   AnnualPassPrice    72 non-null     int64   
 3   Area               72 non-null     object  
 4   ParkType           72 non-null     object  
 5   Visitors2021       72 non-null     int64   
 6   Visitors2020       72 non-null     int64   
 7   Visitors2019       71 non-null     float64 
 8   Change2020         71 non-null     float64 
 9   PercentChange2020  71 non-null     float64 
 10  AreaValues         72 non-null     float64 
 11  ParkTitle          72 non-null     object  
 12  State              72 non-null     category
dtypes: category(1), float64(4), int64(3), object(5)
memory usage: 8.2+ KB
