In [1]:
# Import dependencies 
import pandas as pd

# Clean climdiv_state_year.csv

In [2]:
# Read in data set
df1 = pd.read_csv('Resources/data_raw/climdiv_state_year.csv')
print(df1.shape)
df1.head()

(6000, 4)


Unnamed: 0,fips,year,temp,tempc
0,1,1895,61.641667,16.467593
1,1,1896,64.266667,17.925926
2,1,1897,64.191667,17.884259
3,1,1898,62.983333,17.212963
4,1,1899,63.1,17.277778


In [3]:
# Check data types
df1.dtypes

fips       int64
year       int64
temp     float64
tempc    float64
dtype: object

In [4]:
# Check for null values
df1.isnull().sum()

fips     0
year     0
temp     0
tempc    0
dtype: int64

In [5]:
# Create state dictionary
state_dict = {
    1: ['Alabama','AL'], 2: ['Alaska','AK'], 4: ['Arizona','AZ'], 5: ['Arkansas','AR'], 
    6: ['California','CA'], 8: ['Colorado','CO'], 9: ['Connecticut','CT'], 10: ['Delaware','DE'], 
    11: ['District of Columbia','DC'], 12: ['Florida','FL'], 13: ['Georgia','GA'], 15: ['Hawaii','HI'],
    16: ['Idaho','ID'], 17: ['Illinois','IL'], 18: ['Indiana','IN'], 19: ['Iowa','IA'], 
    20: ['Kansas','KS'], 21: ['Kentucky','KY'], 22: ['Louisiana','LA'], 23: ['Maine','ME'],
    24: ['Maryland','MD'], 25: ['Massachusetts','MA'], 26: ['Michigan','MI'], 27: ['Minnesota','MN'],
    28: ['Mississippi','MS'], 29: ['Missouri','MO'], 30: ['Montana','MT'], 31: ['Nebraska','NE'], 
    32: ['Nevada','NV'], 33: ['New Hampshire','NH'], 34: ['New Jersey','NJ'], 35: ['New Mexico','NM'], 
    36: ['New York','NY'], 37: ['North Carolina','NC'], 38: ['North Dakota','ND'], 39: ['Ohio','OH'],
    40: ['Oklahoma','OK'], 41: ['Oregon','OR'], 42: ['Pennsylvania','PA'], 44: ['Rhode Island','RI'], 
    45: ['South Carolina','SC'], 46: ['South Dakota','SD'], 47: ['Tennessee','TN'], 48: ['Texas','TX'], 
    49: ['Utah','UT'], 50: ['Vermont','VT'], 51: ['Virginia','VA'], 53: ['Washington','WA'], 
    54: ['West Virginia','WV'], 55: ['Wisconsin','WI'], 56: ['Wyoming','WY'], 60: ['American Samoa','AS'], 
    66: ['Guam','GU'], 69: ['Northern Mariana Islands','MP'], 72: ['Puerto Rico','PR'], 
    74: ['U.S. Minor Outlying Islands','UM'], 78: ['U.S. Virgin Islands','VI']
}

In [6]:
# Replace fips column with a column for state names and their abbreviations
df1['state'] = df1['fips'].apply(lambda x: state_dict[x][0])
df1['abb'] = df1['fips'].apply(lambda x: state_dict[x][1])
df1.drop('fips', axis = 'columns', inplace = True)
df1.head()

Unnamed: 0,year,temp,tempc,state,abb
0,1895,61.641667,16.467593,Alabama,AL
1,1896,64.266667,17.925926,Alabama,AL
2,1897,64.191667,17.884259,Alabama,AL
3,1898,62.983333,17.212963,Alabama,AL
4,1899,63.1,17.277778,Alabama,AL


In [7]:
# Rename Columns
df1.columns = ['year', 'tempf', 'tempc', 'state', 'abb']

# Re-arrange Columns
df1 = df1[['state','abb', 'year', 'tempf', 'tempc']].copy()
print(df1.shape)
df1.head()

(6000, 5)


Unnamed: 0,state,abb,year,tempf,tempc
0,Alabama,AL,1895,61.641667,16.467593
1,Alabama,AL,1896,64.266667,17.925926
2,Alabama,AL,1897,64.191667,17.884259
3,Alabama,AL,1898,62.983333,17.212963
4,Alabama,AL,1899,63.1,17.277778


# Clean climdiv_national_year.csv

In [8]:
# Read in data set
df2 = pd.read_csv('Resources/data_raw/climdiv_national_year.csv')
print(df2.shape)
df2.head()

(125, 3)


Unnamed: 0,year,temp,tempc
0,1895,50.3375,10.1875
1,1896,51.993333,11.107407
2,1897,51.556667,10.864815
3,1898,51.431667,10.79537
4,1899,51.009167,10.560648


In [9]:
# Check data types
df2.dtypes

year       int64
temp     float64
tempc    float64
dtype: object

In [10]:
# Check for null values
df2.isnull().sum()

year     0
temp     0
tempc    0
dtype: int64

In [11]:
# Rename Columns
df2.columns = ['year', 'tempf', 'tempc']
print(df2.shape)
df2.head()

(125, 3)


Unnamed: 0,year,tempf,tempc
0,1895,50.3375,10.1875
1,1896,51.993333,11.107407
2,1897,51.556667,10.864815
3,1898,51.431667,10.79537
4,1899,51.009167,10.560648


# Clean model_state.csv

In [12]:
# Read in file
df3 = pd.read_csv('Resources/data_raw/model_state.csv')
print(df3.shape)
df3.head()

(48, 10)


Unnamed: 0,fips,Fall,Spring,Summer,Winter,max_warming_season,Annual,STUSAB,STATE_NAME,STATENS
0,1,-0.195668,-0.105862,-0.325009,0.458526,Winter,-0.035048,AL,Alabama,1779775
1,4,1.203951,1.38448,1.274455,1.388388,Winter,1.31988,AZ,Arizona,1779777
2,5,-0.04254,0.266399,0.058596,0.532247,Winter,0.214074,AR,Arkansas,68085
3,6,1.570921,1.449242,1.478335,1.41243,Fall,1.480561,CA,California,1779778
4,8,1.055309,1.43691,1.367845,1.838758,Winter,1.438589,CO,Colorado,1779779


In [13]:
# Check data types
df3.dtypes

fips                    int64
Fall                  float64
Spring                float64
Summer                float64
Winter                float64
max_warming_season     object
Annual                float64
STUSAB                 object
STATE_NAME             object
STATENS                 int64
dtype: object

In [14]:
# Check for null values
df3.isnull().sum()

fips                  0
Fall                  0
Spring                0
Summer                0
Winter                0
max_warming_season    0
Annual                0
STUSAB                0
STATE_NAME            0
STATENS               0
dtype: int64

In [15]:
# Keep relevant columns
df3 = df3[['STATE_NAME','STUSAB', 'Annual','Fall', 'Spring', 'Summer', 'Winter', 'max_warming_season']].copy()
df3.columns = ['name','abb', 'annual','fall', 'spring', 'summer', 'winter', 'max_warming_season']
print(df3.shape)
df3.head()

(48, 8)


Unnamed: 0,name,abb,annual,fall,spring,summer,winter,max_warming_season
0,Alabama,AL,-0.035048,-0.195668,-0.105862,-0.325009,0.458526,Winter
1,Arizona,AZ,1.31988,1.203951,1.38448,1.274455,1.388388,Winter
2,Arkansas,AR,0.214074,-0.04254,0.266399,0.058596,0.532247,Winter
3,California,CA,1.480561,1.570921,1.449242,1.478335,1.41243,Fall
4,Colorado,CO,1.438589,1.055309,1.43691,1.367845,1.838758,Winter


In [16]:
# Read in data set 
# Note: This describes the Carbon intensity of the economy by state (1997 - 2018)
# measured as metric tons of energy-related carbon dioxide per chained 2012 million dollars of GDP
# Source: U.S. Energy Information Administration, State Energy Data System and EIA calculations made for this analysis.
# Note: State-level GDP is provided by the Bureau of Economic Analysis. The earliest available year for this data is 1997
#df2 = pd.read_csv('Resources/raw_data/carbon_intensity_of_the_economy.csv')
#df2.head()