In [1]:
# Import dependencies 
import pandas as pd

# Clean climdiv_state_year.csv

In [2]:
# Read in data set
df1 = pd.read_csv('Resources/raw_data/climdiv_state_year.csv')
print(df1.head())
print(df1.shape)

   fips  year       temp      tempc
0     1  1895  61.641667  16.467593
1     1  1896  64.266667  17.925926
2     1  1897  64.191667  17.884259
3     1  1898  62.983333  17.212963
4     1  1899  63.100000  17.277778
(6000, 4)


In [3]:
# Check data types
df1.dtypes

fips       int64
year       int64
temp     float64
tempc    float64
dtype: object

In [4]:
# Check for null values
df1.isnull().sum()

fips     0
year     0
temp     0
tempc    0
dtype: int64

In [5]:
# Create state dictionary
state_dict = {
    1: ['Alabama','AL'], 2: ['Alaska','AK'], 4: ['Arizona','AZ'], 5: ['Arkansas','AR'], 
    6: ['California','CA'], 8: ['Colorado','CO'], 9: ['Connecticut','CT'], 10: ['Delaware','DE'], 
    11: ['District of Columbia','DC'], 12: ['Florida','FL'], 13: ['Georgia','GA'], 15: ['Hawaii','HI'],
    16: ['Idaho','ID'], 17: ['Illinois','IL'], 18: ['Indiana','IN'], 19: ['Iowa','IA'], 
    20: ['Kansas','KS'], 21: ['Kentucky','KY'], 22: ['Louisiana','LA'], 23: ['Maine','ME'],
    24: ['Maryland','MD'], 25: ['Massachusetts','MA'], 26: ['Michigan','MI'], 27: ['Minnesota','MN'],
    28: ['Mississippi','MS'], 29: ['Missouri','MO'], 30: ['Montana','MT'], 31: ['Nebraska','NE'], 
    32: ['Nevada','NV'], 33: ['New Hampshire','NH'], 34: ['New Jersey','NJ'], 35: ['New Mexico','NM'], 
    36: ['New York','NY'], 37: ['North Carolina','NC'], 38: ['North Dakota','ND'], 39: ['Ohio','OH'],
    40: ['Oklahoma','OK'], 41: ['Oregon','OR'], 42: ['Pennsylvania','PA'], 44: ['Rhode Island','RI'], 
    45: ['South Carolina','SC'], 46: ['South Dakota','SD'], 47: ['Tennessee','TN'], 48: ['Texas','TX'], 
    49: ['Utah','UT'], 50: ['Vermont','VT'], 51: ['Virginia','VA'], 53: ['Washington','WA'], 
    54: ['West Virginia','WV'], 55: ['Wisconsin','WI'], 56: ['Wyoming','WY'], 60: ['American Samoa','AS'], 
    66: ['Guam','GU'], 69: ['Northern Mariana Islands','MP'], 72: ['Puerto Rico','PR'], 
    74: ['U.S. Minor Outlying Islands','UM'], 78: ['U.S. Virgin Islands','VI']
}

In [6]:
# Replace fips column with a column for state names and their abbreviations
df1['state'] = df1['fips'].apply(lambda x: state_dict[x][0])
df1['abb'] = df1['fips'].apply(lambda x: state_dict[x][1])
df1.drop('fips', axis = 'columns', inplace = True)
df1

Unnamed: 0,year,temp,tempc,state,abb
0,1895,61.641667,16.467593,Alabama,AL
1,1896,64.266667,17.925926,Alabama,AL
2,1897,64.191667,17.884259,Alabama,AL
3,1898,62.983333,17.212963,Alabama,AL
4,1899,63.100000,17.277778,Alabama,AL
...,...,...,...,...,...
5995,2015,44.158333,6.754630,Wyoming,WY
5996,2016,43.908333,6.615741,Wyoming,WY
5997,2017,43.200000,6.222222,Wyoming,WY
5998,2018,42.408333,5.782407,Wyoming,WY


In [7]:
# Rename Columns
df1.columns = ['year', 'tempf', 'tempc', 'state', 'abb']

# Re-arrange Columns
df1 = df1[['state','abb', 'year', 'tempf', 'tempc']].copy()
df1

Unnamed: 0,state,abb,year,tempf,tempc
0,Alabama,AL,1895,61.641667,16.467593
1,Alabama,AL,1896,64.266667,17.925926
2,Alabama,AL,1897,64.191667,17.884259
3,Alabama,AL,1898,62.983333,17.212963
4,Alabama,AL,1899,63.100000,17.277778
...,...,...,...,...,...
5995,Wyoming,WY,2015,44.158333,6.754630
5996,Wyoming,WY,2016,43.908333,6.615741
5997,Wyoming,WY,2017,43.200000,6.222222
5998,Wyoming,WY,2018,42.408333,5.782407


# Clean climdiv_national_year.csv

In [8]:
# Read in data set
df2 = pd.read_csv('Resources/raw_data/climdiv_national_year.csv')
df2

Unnamed: 0,year,temp,tempc
0,1895,50.337500,10.187500
1,1896,51.993333,11.107407
2,1897,51.556667,10.864815
3,1898,51.431667,10.795370
4,1899,51.009167,10.560648
...,...,...,...
120,2015,54.401667,12.445370
121,2016,54.915000,12.730556
122,2017,54.551667,12.528704
123,2018,53.518333,11.954630


In [9]:
# Check data types
df2.dtypes

year       int64
temp     float64
tempc    float64
dtype: object

In [10]:
# Check for null values
df1.isnull().sum()

state    0
abb      0
year     0
tempf    0
tempc    0
dtype: int64

In [11]:
# Rename Columns
df2.columns = ['year', 'tempf', 'tempc']
df2

Unnamed: 0,year,tempf,tempc
0,1895,50.337500,10.187500
1,1896,51.993333,11.107407
2,1897,51.556667,10.864815
3,1898,51.431667,10.795370
4,1899,51.009167,10.560648
...,...,...,...
120,2015,54.401667,12.445370
121,2016,54.915000,12.730556
122,2017,54.551667,12.528704
123,2018,53.518333,11.954630


# Clean carbon_intensity_of_the_economy.csv

In [12]:
# Read in data set 
# Note: This describes the Carbon intensity of the economy by state (1997 - 2018)
# measured as metric tons of energy-related carbon dioxide per chained 2012 million dollars of GDP
# Source: U.S. Energy Information Administration, State Energy Data System and EIA calculations made for this analysis.
# Note: State-level GDP is provided by the Bureau of Economic Analysis. The earliest available year for this data is 1997
#df2 = pd.read_csv('Resources/raw_data/carbon_intensity_of_the_economy.csv')
#df2.head()