In [38]:
import pandas as pd
import numpy as np

In [39]:
precipitation = pd.read_csv('data-precipitation.csv')
temperature = pd.read_csv('data-temperature.csv')
population = pd.read_csv('co-est2022-alldata.csv')
palmerz = pd.read_csv('data-palmerz.csv')

## Population Data Processing

In [40]:
pop = population.loc[:, ['STNAME', 'CTYNAME', 'POPESTIMATE2022']]
pop = pop.rename(columns={'STNAME': 'State', 'CTYNAME': 'County','POPESTIMATE2022':'Population'})
pop = pop[pop['State'] != pop['County']]
pop.head()

Unnamed: 0,State,County,Population
1,Alabama,Autauga County,59759
2,Alabama,Baldwin County,246435
3,Alabama,Barbour County,24706
4,Alabama,Bibb County,22005
5,Alabama,Blount County,59512


In [41]:
temp = temperature.loc[:, ['State', 'Name', 'Value']]
temp= temp.rename(columns={'Name': 'County', 'Value': 'Temperature'})
temp.head()

Unnamed: 0,State,County,Temperature
0,Alabama,Autauga County,66.1
1,Alabama,Baldwin County,70.1
2,Alabama,Barbour County,66.4
3,Alabama,Bibb County,64.4
4,Alabama,Blount County,62.5


In [42]:
palm = palmerz.loc[:, ['State', 'Name', 'Value']]
palm= palm.rename(columns={'Name': 'County', 'Value': 'Palmer-Z'})
palm.head()

Unnamed: 0,State,County,Palmer-Z
0,Alabama,Autauga County,-0.26
1,Alabama,Baldwin County,-0.84
2,Alabama,Barbour County,-0.22
3,Alabama,Bibb County,-0.29
4,Alabama,Blount County,0.24


In [43]:
preci = precipitation.loc[:, ['State', 'Name', 'Value']]
preci= preci.rename(columns={'Name': 'County', 'Value': 'Precipitation'})
preci.head()

Unnamed: 0,State,County,Precipitation
0,Alabama,Autauga County,51.89
1,Alabama,Baldwin County,50.71
2,Alabama,Barbour County,48.95
3,Alabama,Bibb County,53.22
4,Alabama,Blount County,60.68


In [44]:
dfs = [pop, temp, preci, palm]
for df in dfs:
    df.set_index(['State', 'County'], inplace=True)

reforestationDB = pop.join([temp, preci, palm], how='outer').reset_index()

reforestationDB.fillna(0, inplace=True)

reforestationDB.head()

Unnamed: 0,State,County,Population,Temperature,Precipitation,Palmer-Z
0,Alabama,Autauga County,59759.0,66.1,51.89,-0.26
1,Alabama,Baldwin County,246435.0,70.1,50.71,-0.84
2,Alabama,Barbour County,24706.0,66.4,48.95,-0.22
3,Alabama,Bibb County,22005.0,64.4,53.22,-0.29
4,Alabama,Blount County,59512.0,62.5,60.68,0.24


## County Names edit for web scraping


In [45]:
#for the url
reforestationDB['County'] = reforestationDB['County'].str.replace(' County', '').str.strip()
reforestationDB['County'] = reforestationDB['County'].str.replace(r'\.\s+', '-')
reforestationDB['County'] = reforestationDB['County'].str.replace(' ', '-')
reforestationDB.head()

Unnamed: 0,State,County,Population,Temperature,Precipitation,Palmer-Z
0,Alabama,Autauga,59759.0,66.1,51.89,-0.26
1,Alabama,Baldwin,246435.0,70.1,50.71,-0.84
2,Alabama,Barbour,24706.0,66.4,48.95,-0.22
3,Alabama,Bibb,22005.0,64.4,53.22,-0.29
4,Alabama,Blount,59512.0,62.5,60.68,0.24


In [46]:
# for the url
state_abbreviations = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}

reforestationDB['Abbreviation'] = reforestationDB['State'].map(state_abbreviations)



In [47]:
reforestationDB = reforestationDB[['State', 'Abbreviation','County','Population','Temperature','Precipitation','Palmer-Z']] 
reforestationDB.head()

Unnamed: 0,State,Abbreviation,County,Population,Temperature,Precipitation,Palmer-Z
0,Alabama,AL,Autauga,59759.0,66.1,51.89,-0.26
1,Alabama,AL,Baldwin,246435.0,70.1,50.71,-0.84
2,Alabama,AL,Barbour,24706.0,66.4,48.95,-0.22
3,Alabama,AL,Bibb,22005.0,64.4,53.22,-0.29
4,Alabama,AL,Blount,59512.0,62.5,60.68,0.24


## Create and Save in New CSV

In [49]:
reforestationDB.to_csv('reforestationDB.csv', index=False)