In [1]:
import pandas as pd
import openmeteo_requests

import requests_cache
import pandas as pd
from retry_requests import retry

In [2]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
    "latitude": [32.806671, 61.370716, 33.729759, 34.969704, 36.116203, 39.059811, 41.597782, 39.318523, 38.897438,
                 27.766279, 33.040619, 21.094318, 44.240459, 40.349457, 39.849426, 42.011539, 38.5266, 37.66814,
                 31.169546, 44.693947, 39.063946, 42.230171, 43.326618, 45.694454, 32.741646, 38.456085, 46.921925,
                 41.12537, 38.313515, 43.452492, 40.298904, 34.840515, 42.165726, 35.630066, 47.528912, 40.388783,
                 35.565342, 44.572021, 40.590752, 41.680893, 33.856892, 44.299782, 35.747845, 31.054487, 40.150032,
                 44.045876, 37.769337, 47.400902, 38.491226, 44.268543, 42.755966],
    "longitude": [-86.79113, -152.404419, -111.431221, -92.373123, -119.681564, -105.311104, -72.755371, -75.507141,
                  -77.026817, -81.686783, -83.643074, -157.498337, -114.478828, -88.986137, -86.258278, -93.210526,
                  -96.726486, -84.670067, -91.867805, -69.381927, -76.802101, -71.530106, -84.536095, -93.900192,
                  -89.678696, -92.288368, -110.454353, -98.268082, -117.055374, -71.563896, -74.521011, -106.248482,
                  -74.948051, -79.806419, -99.784012, -82.764915, -96.928917, -122.070938, -77.209755, -71.51178,
                  -80.945007, -99.438828, -86.692345, -97.563461, -111.862434, -72.710686, -78.169968, -121.490494,
                  -80.954453, -89.616508, -107.30249],
    "start_date": "2000-01-31",
    "end_date": "2023-12-31",
    "hourly": "temperature_2m",
    "daily": ["temperature_2m_max", "temperature_2m_min", "daylight_duration", "rain_sum", "snowfall_sum"],
    "temperature_unit": "fahrenheit",
    "wind_speed_unit": "mph",
    "precipitation_unit": "inch"
}
responses = openmeteo.weather_api(url, params=params)

# Initialize lists to hold hourly and daily data
hourly_dataframes = []
daily_dataframes = []

# Loop through all responses to process data for each location
for response in responses:
    hourly = response.Hourly()
    hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
    hourly_data = {
        "date": pd.date_range(
            start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
            end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=hourly.Interval()),
            inclusive="left"
        ),
        "latitude": response.Latitude(),
        "longitude": response.Longitude(),
        "temperature_2m": hourly_temperature_2m
    }
    hourly_dataframes.append(pd.DataFrame(data=hourly_data))

    daily = response.Daily()
    daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()
    daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()
    daily_daylight_duration = daily.Variables(2).ValuesAsNumpy()
    daily_rain_sum = daily.Variables(3).ValuesAsNumpy()
    daily_snowfall_sum = daily.Variables(4).ValuesAsNumpy()

    daily_data = {
        "date": pd.date_range(
            start=pd.to_datetime(daily.Time(), unit="s", utc=True),
            end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=daily.Interval()),
            inclusive="left"
        ),
        "latitude": response.Latitude(),
        "longitude": response.Longitude(),
        "temp_max": daily_temperature_2m_max,
        "temp_min": daily_temperature_2m_min,
        
        "daylight": daily_daylight_duration,
        "rain_sum": daily_rain_sum,
        "snowfall_sum": daily_snowfall_sum


    }
    daily_dataframes.append(pd.DataFrame(data=daily_data))

# Concatenate hourly and daily dataframes for all locations
hourly_dataframe = pd.concat(hourly_dataframes, ignore_index=True)
daily_dataframe = pd.concat(daily_dataframes, ignore_index=True)

# print(hourly_dataframe)
# print(daily_dataframe)


In [3]:
daily_dataframe

Unnamed: 0,date,latitude,longitude,temp_max,temp_min,daylight,rain_sum,snowfall_sum
0,2000-01-31 00:00:00+00:00,32.794376,-86.809296,42.799099,27.679100,38088.992188,0.0,0.0
1,2000-02-01 00:00:00+00:00,32.794376,-86.809296,51.619099,28.489100,38185.601562,0.0,0.0
2,2000-02-02 00:00:00+00:00,32.794376,-86.809296,49.189098,29.569099,38283.316406,0.0,0.0
3,2000-02-03 00:00:00+00:00,32.794376,-86.809296,53.599098,27.589100,38382.019531,0.0,0.0
4,2000-02-04 00:00:00+00:00,32.794376,-86.809296,52.429100,35.869099,38481.609375,0.0,0.0
...,...,...,...,...,...,...,...,...
445531,2023-12-27 00:00:00+00:00,42.776798,-107.440826,20.932699,6.802700,32558.552734,0.0,0.0
445532,2023-12-28 00:00:00+00:00,42.776798,-107.440826,19.042700,6.982700,32585.277344,0.0,0.0
445533,2023-12-29 00:00:00+00:00,42.776798,-107.440826,22.372700,12.202702,32616.460938,0.0,0.0
445534,2023-12-30 00:00:00+00:00,42.776798,-107.440826,25.522699,15.622700,32652.011719,0.0,0.0


In [4]:
daily_dataframe['lat'] = daily_dataframe['latitude'].astype(str).str[:2]
daily_dataframe['lon'] = daily_dataframe['longitude'].astype(str).str[:3]
daily_dataframe['concat'] = daily_dataframe['lat']+daily_dataframe['lon']
daily_dataframe

Unnamed: 0,date,latitude,longitude,temp_max,temp_min,daylight,rain_sum,snowfall_sum,lat,lon,concat
0,2000-01-31 00:00:00+00:00,32.794376,-86.809296,42.799099,27.679100,38088.992188,0.0,0.0,32,-86,32-86
1,2000-02-01 00:00:00+00:00,32.794376,-86.809296,51.619099,28.489100,38185.601562,0.0,0.0,32,-86,32-86
2,2000-02-02 00:00:00+00:00,32.794376,-86.809296,49.189098,29.569099,38283.316406,0.0,0.0,32,-86,32-86
3,2000-02-03 00:00:00+00:00,32.794376,-86.809296,53.599098,27.589100,38382.019531,0.0,0.0,32,-86,32-86
4,2000-02-04 00:00:00+00:00,32.794376,-86.809296,52.429100,35.869099,38481.609375,0.0,0.0,32,-86,32-86
...,...,...,...,...,...,...,...,...,...,...,...
445531,2023-12-27 00:00:00+00:00,42.776798,-107.440826,20.932699,6.802700,32558.552734,0.0,0.0,42,-10,42-10
445532,2023-12-28 00:00:00+00:00,42.776798,-107.440826,19.042700,6.982700,32585.277344,0.0,0.0,42,-10,42-10
445533,2023-12-29 00:00:00+00:00,42.776798,-107.440826,22.372700,12.202702,32616.460938,0.0,0.0,42,-10,42-10
445534,2023-12-30 00:00:00+00:00,42.776798,-107.440826,25.522699,15.622700,32652.011719,0.0,0.0,42,-10,42-10


In [5]:
state_merge = pd.read_csv("C:/Github/Group3_Project4/pete files/pete_resources/state lat lon.csv")
state_merge.head()

Unnamed: 0,State,Latitude,Longitude,reduced,reduced.1,concat,region,sub-region,country
0,Alabama,32.806671,-86.79113,32,-86,32-86,South,East South Central,US
1,Alaska,61.370716,-152.404419,61,-15,61-15,West,Pacific,US
2,Arizona,33.729759,-111.431221,33,-11,33-11,West,Mountain,US
3,Arkansas,34.969704,-92.373123,34,-92,34-92,South,West South Central,US
4,California,36.116203,-119.681564,36,-11,36-11,West,Pacific,US


In [6]:
merged_weather = pd.merge(daily_dataframe, state_merge, on = 'concat', how = 'left')
merged_weather.head()

Unnamed: 0,date,latitude,longitude,temp_max,temp_min,daylight,rain_sum,snowfall_sum,lat,lon,concat,State,Latitude,Longitude,reduced,reduced.1,region,sub-region,country
0,2000-01-31 00:00:00+00:00,32.794376,-86.809296,42.799099,27.6791,38088.992188,0.0,0.0,32,-86,32-86,Alabama,32.806671,-86.79113,32,-86,South,East South Central,US
1,2000-02-01 00:00:00+00:00,32.794376,-86.809296,51.619099,28.4891,38185.601562,0.0,0.0,32,-86,32-86,Alabama,32.806671,-86.79113,32,-86,South,East South Central,US
2,2000-02-02 00:00:00+00:00,32.794376,-86.809296,49.189098,29.569099,38283.316406,0.0,0.0,32,-86,32-86,Alabama,32.806671,-86.79113,32,-86,South,East South Central,US
3,2000-02-03 00:00:00+00:00,32.794376,-86.809296,53.599098,27.5891,38382.019531,0.0,0.0,32,-86,32-86,Alabama,32.806671,-86.79113,32,-86,South,East South Central,US
4,2000-02-04 00:00:00+00:00,32.794376,-86.809296,52.4291,35.869099,38481.609375,0.0,0.0,32,-86,32-86,Alabama,32.806671,-86.79113,32,-86,South,East South Central,US


In [7]:
weather = merged_weather.drop(columns = ['lat', 'lon', 'concat', 'Latitude', 'Longitude', 'reduced','reduced.1'])
# weather.set_index('State', inplace=True)
weather


Unnamed: 0,date,latitude,longitude,temp_max,temp_min,daylight,rain_sum,snowfall_sum,State,region,sub-region,country
0,2000-01-31 00:00:00+00:00,32.794376,-86.809296,42.799099,27.679100,38088.992188,0.0,0.0,Alabama,South,East South Central,US
1,2000-02-01 00:00:00+00:00,32.794376,-86.809296,51.619099,28.489100,38185.601562,0.0,0.0,Alabama,South,East South Central,US
2,2000-02-02 00:00:00+00:00,32.794376,-86.809296,49.189098,29.569099,38283.316406,0.0,0.0,Alabama,South,East South Central,US
3,2000-02-03 00:00:00+00:00,32.794376,-86.809296,53.599098,27.589100,38382.019531,0.0,0.0,Alabama,South,East South Central,US
4,2000-02-04 00:00:00+00:00,32.794376,-86.809296,52.429100,35.869099,38481.609375,0.0,0.0,Alabama,South,East South Central,US
...,...,...,...,...,...,...,...,...,...,...,...,...
445531,2023-12-27 00:00:00+00:00,42.776798,-107.440826,20.932699,6.802700,32558.552734,0.0,0.0,Wyoming,West,Mountain,US
445532,2023-12-28 00:00:00+00:00,42.776798,-107.440826,19.042700,6.982700,32585.277344,0.0,0.0,Wyoming,West,Mountain,US
445533,2023-12-29 00:00:00+00:00,42.776798,-107.440826,22.372700,12.202702,32616.460938,0.0,0.0,Wyoming,West,Mountain,US
445534,2023-12-30 00:00:00+00:00,42.776798,-107.440826,25.522699,15.622700,32652.011719,0.0,0.0,Wyoming,West,Mountain,US


In [8]:
# separate out the month and day from the date column
weather['month'] = weather['date'].dt.month
weather['year'] = weather['date'].dt.year
weather['Date'] =  weather['month'].astype(str).str.zfill(2) + '-' + weather['year'].astype(str)
weather['Date'] = pd.to_datetime(weather['Date'])
changed_date_weather = weather.drop(columns = ['date']) #keep lat and lon for tableau mapping?
changed_date_weather

  weather['Date'] = pd.to_datetime(weather['Date'])


Unnamed: 0,latitude,longitude,temp_max,temp_min,daylight,rain_sum,snowfall_sum,State,region,sub-region,country,month,year,Date
0,32.794376,-86.809296,42.799099,27.679100,38088.992188,0.0,0.0,Alabama,South,East South Central,US,1,2000,2000-01-01
1,32.794376,-86.809296,51.619099,28.489100,38185.601562,0.0,0.0,Alabama,South,East South Central,US,2,2000,2000-02-01
2,32.794376,-86.809296,49.189098,29.569099,38283.316406,0.0,0.0,Alabama,South,East South Central,US,2,2000,2000-02-01
3,32.794376,-86.809296,53.599098,27.589100,38382.019531,0.0,0.0,Alabama,South,East South Central,US,2,2000,2000-02-01
4,32.794376,-86.809296,52.429100,35.869099,38481.609375,0.0,0.0,Alabama,South,East South Central,US,2,2000,2000-02-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
445531,42.776798,-107.440826,20.932699,6.802700,32558.552734,0.0,0.0,Wyoming,West,Mountain,US,12,2023,2023-12-01
445532,42.776798,-107.440826,19.042700,6.982700,32585.277344,0.0,0.0,Wyoming,West,Mountain,US,12,2023,2023-12-01
445533,42.776798,-107.440826,22.372700,12.202702,32616.460938,0.0,0.0,Wyoming,West,Mountain,US,12,2023,2023-12-01
445534,42.776798,-107.440826,25.522699,15.622700,32652.011719,0.0,0.0,Wyoming,West,Mountain,US,12,2023,2023-12-01


In [9]:
# # save dataframe - all years
# changed_date_weather.to_csv("pete_resources/2000-2023_weather.csv")

In [10]:
# Annual averages
annual_weather = changed_date_weather.drop(columns = ['Date'])

In [11]:
annual_weather['max_temp_avg'] = annual_weather.groupby(['year', 'month'])['temp_max'].transform('mean')
annual_weather['min_temp_avg'] = annual_weather.groupby(['year', 'month'])['temp_min'].transform('mean')
annual_weather['daylight_hr_avg'] = annual_weather.groupby(['year', 'month'])['daylight'].transform('mean')/3600
annual_weather['rain_avg'] = annual_weather.groupby(['year', 'month'])['rain_sum'].transform('mean')
annual_weather['snowfall_avg'] = annual_weather.groupby(['year', 'month'])['snowfall_sum'].transform('mean')

annual_weather = annual_weather.drop(columns = ['temp_max', 'temp_min','daylight','rain_sum','snowfall_sum'])
weather_no_dups = annual_weather.drop_duplicates()
weather_no_dups

Unnamed: 0,latitude,longitude,State,region,sub-region,country,month,year,max_temp_avg,min_temp_avg,daylight_hr_avg,rain_avg,snowfall_avg
0,32.794376,-86.809296,Alabama,South,East South Central,US,1,2000,36.555183,21.022242,12.424892,0.080413,0.087108
1,32.794376,-86.809296,Alabama,South,East South Central,US,2,2000,47.419319,29.716642,12.424892,0.080413,0.087108
30,32.794376,-86.809296,Alabama,South,East South Central,US,3,2000,55.031422,36.188179,12.424892,0.080413,0.087108
61,32.794376,-86.809296,Alabama,South,East South Central,US,4,2000,61.308537,41.842239,12.424892,0.080413,0.087108
91,32.794376,-86.809296,Alabama,South,East South Central,US,5,2000,72.299355,53.421158,12.424892,0.080413,0.087108
...,...,...,...,...,...,...,...,...,...,...,...,...,...
445383,42.776798,-107.440826,Wyoming,West,Mountain,US,8,2023,83.375290,64.406975,12.205958,0.095275,0.107491
445414,42.776798,-107.440826,Wyoming,West,Mountain,US,9,2023,77.181503,58.523502,12.205958,0.095275,0.107491
445444,42.776798,-107.440826,Wyoming,West,Mountain,US,10,2023,65.951889,48.432285,12.205958,0.095275,0.107491
445475,42.776798,-107.440826,Wyoming,West,Mountain,US,11,2023,53.320210,36.599918,12.205958,0.095275,0.107491


In [12]:
weather_no_dups.to_csv("pete_resources/annual_weather_avgs_2000-2023.csv")

In [13]:
# monthly_weather = changed_date_weather.drop(columns = ['Date'])

In [14]:
# monthly_weather['max_temp_avg'] = monthly_weather.groupby('year')['temp_max'].transform('mean')
# monthly_weather['min_temp_avg'] = monthly_weather.groupby('year')['temp_min'].transform('mean')
# monthly_weather['daylight_hr_avg'] = monthly_weather.groupby('year')['daylight'].transform('mean')/3600
# monthly_weather['rain_avg'] = monthly_weather.groupby('year')['rain_sum'].transform('mean')
# monthly_weather['snowfall_avg'] = monthly_weather.groupby('year')['snowfall_sum'].transform('mean')

# monthly_weather = monthly_weather.drop(columns = ['temp_max', 'temp_min','daylight','rain_sum','snowfall_sum'])
# mo_weather_no_dups = monthly_weather.drop_duplicates()
# mo_weather_no_dups.head()

In [15]:
# mo_weather_no_dups.to_csv("pete_resources/monthly_weather_avgs_2000-2023.csv")

In [16]:
#TEN YEAR FILTER 
year_filter_start = 2010
year_filter_end = 2020 

month_filter_start = 1
month_filter_end = 12

year_begin = (weather_no_dups['year'] >= year_filter_start) & (weather_no_dups['month'] >= month_filter_start)

year_end = (weather_no_dups['year'] <= year_filter_end) & (weather_no_dups['month'] <= month_filter_end)

# Apply the filters
ten_yr_filtered_weather = weather_no_dups.loc[year_begin & year_end]

ten_yr_filtered_weather.head()

Unnamed: 0,latitude,longitude,State,region,sub-region,country,month,year,max_temp_avg,min_temp_avg,daylight_hr_avg,rain_avg,snowfall_avg
3623,32.794376,-86.809296,Alabama,South,East South Central,US,1,2010,36.937668,22.684311,12.206028,0.085311,0.11215
3654,32.794376,-86.809296,Alabama,South,East South Central,US,2,2010,38.076675,23.072702,12.206028,0.085311,0.11215
3682,32.794376,-86.809296,Alabama,South,East South Central,US,3,2010,52.501175,34.194633,12.206028,0.085311,0.11215
3713,32.794376,-86.809296,Alabama,South,East South Central,US,4,2010,64.333412,44.25436,12.206028,0.085311,0.11215
3743,32.794376,-86.809296,Alabama,South,East South Central,US,5,2010,71.213722,52.538177,12.206028,0.085311,0.11215


In [17]:
#FIVE YEAR FILTER 
year_filter_start = 2016
year_filter_end = 2020

month_filter_start = 1
month_filter_end = 12

year_begin = (weather_no_dups['year'] >= year_filter_start) & (weather_no_dups['month'] >= month_filter_start)

year_end = (weather_no_dups['year'] <= year_filter_end) & (weather_no_dups['month'] <= month_filter_end)

# Apply the filters
five_yr_filtered_weather = weather_no_dups.loc[year_begin & year_end]

five_yr_filtered_weather.head()

Unnamed: 0,latitude,longitude,State,region,sub-region,country,month,year,max_temp_avg,min_temp_avg,daylight_hr_avg,rain_avg,snowfall_avg
5814,32.794376,-86.809296,Alabama,South,East South Central,US,1,2016,39.274536,24.255241,12.19835,0.086491,0.093635
5845,32.794376,-86.809296,Alabama,South,East South Central,US,2,2016,45.903133,29.12705,12.19835,0.086491,0.093635
5874,32.794376,-86.809296,Alabama,South,East South Central,US,3,2016,56.460552,38.427082,12.19835,0.086491,0.093635
5905,32.794376,-86.809296,Alabama,South,East South Central,US,4,2016,62.170242,43.301945,12.19835,0.086491,0.093635
5935,32.794376,-86.809296,Alabama,South,East South Central,US,5,2016,69.735977,51.51915,12.19835,0.086491,0.093635


In [18]:
building = pd.read_csv("C:/Github/Group3_Project4/pete files/pete_resources/regional_home_data.csv")
building = building.rename(columns = {'Region': 'region'})
building.head()

Unnamed: 0.1,Unnamed: 0,region,Total Residential Buildings (millions),Before 1950 (millions),1950-1959 (millions),1960-1969 (millions),1970-1979 (millions),1980-1989 (millions),1990-1999 (millions),2000-2009 (millions),2010-2015 (millions),2016-2020 (millions)
0,0,All homes,123,20,12,12,18,16,17,16,5,4
1,1,Northeast,21,7,3,2,2,2,1,1,0,0
2,2,New England,5,1,0,0,0,0,0,0,0,0
3,3,Middle Atlantic,16,5,2,1,1,1,1,1,0,0
4,4,Midwest,27,6,3,2,4,2,3,2,0,0


In [19]:
building_and_weather = pd.merge(ten_yr_filtered_weather, building, on = 'region', how = 'left')
building_and_weather.head()

Unnamed: 0,latitude,longitude,State,region,sub-region,country,month,year,max_temp_avg,min_temp_avg,...,Total Residential Buildings (millions),Before 1950 (millions),1950-1959 (millions),1960-1969 (millions),1970-1979 (millions),1980-1989 (millions),1990-1999 (millions),2000-2009 (millions),2010-2015 (millions),2016-2020 (millions)
0,32.794376,-86.809296,Alabama,South,East South Central,US,1,2010,36.937668,22.684311,...,46,3,3,4,6,7,7,7,2,2
1,32.794376,-86.809296,Alabama,South,East South Central,US,2,2010,38.076675,23.072702,...,46,3,3,4,6,7,7,7,2,2
2,32.794376,-86.809296,Alabama,South,East South Central,US,3,2010,52.501175,34.194633,...,46,3,3,4,6,7,7,7,2,2
3,32.794376,-86.809296,Alabama,South,East South Central,US,4,2010,64.333412,44.25436,...,46,3,3,4,6,7,7,7,2,2
4,32.794376,-86.809296,Alabama,South,East South Central,US,5,2010,71.213722,52.538177,...,46,3,3,4,6,7,7,7,2,2


In [20]:
ten_yr_building_and_weather = building_and_weather.drop(columns = ['Unnamed: 0', 'Total Residential Buildings (millions)', 'Before 1950 (millions)', 
                                                             '1950-1959 (millions)', '1960-1969 (millions)', '1970-1979 (millions)',
                                                             '1980-1989 (millions)', '1990-1999 (millions)', '2000-2009 (millions)'])
ten_yr_building_and_weather.head()

Unnamed: 0,latitude,longitude,State,region,sub-region,country,month,year,max_temp_avg,min_temp_avg,daylight_hr_avg,rain_avg,snowfall_avg,2010-2015 (millions),2016-2020 (millions)
0,32.794376,-86.809296,Alabama,South,East South Central,US,1,2010,36.937668,22.684311,12.206028,0.085311,0.11215,2,2
1,32.794376,-86.809296,Alabama,South,East South Central,US,2,2010,38.076675,23.072702,12.206028,0.085311,0.11215,2,2
2,32.794376,-86.809296,Alabama,South,East South Central,US,3,2010,52.501175,34.194633,12.206028,0.085311,0.11215,2,2
3,32.794376,-86.809296,Alabama,South,East South Central,US,4,2010,64.333412,44.25436,12.206028,0.085311,0.11215,2,2
4,32.794376,-86.809296,Alabama,South,East South Central,US,5,2010,71.213722,52.538177,12.206028,0.085311,0.11215,2,2


In [21]:
ten_yr_building_and_weather.to_csv("pete_resources/10yr_home_and_weather.csv")

In [22]:
five_building_and_weather = pd.merge(five_yr_filtered_weather, building, on = 'region', how = 'left')
five_building_and_weather.head()

Unnamed: 0,latitude,longitude,State,region,sub-region,country,month,year,max_temp_avg,min_temp_avg,...,Total Residential Buildings (millions),Before 1950 (millions),1950-1959 (millions),1960-1969 (millions),1970-1979 (millions),1980-1989 (millions),1990-1999 (millions),2000-2009 (millions),2010-2015 (millions),2016-2020 (millions)
0,32.794376,-86.809296,Alabama,South,East South Central,US,1,2016,39.274536,24.255241,...,46,3,3,4,6,7,7,7,2,2
1,32.794376,-86.809296,Alabama,South,East South Central,US,2,2016,45.903133,29.12705,...,46,3,3,4,6,7,7,7,2,2
2,32.794376,-86.809296,Alabama,South,East South Central,US,3,2016,56.460552,38.427082,...,46,3,3,4,6,7,7,7,2,2
3,32.794376,-86.809296,Alabama,South,East South Central,US,4,2016,62.170242,43.301945,...,46,3,3,4,6,7,7,7,2,2
4,32.794376,-86.809296,Alabama,South,East South Central,US,5,2016,69.735977,51.51915,...,46,3,3,4,6,7,7,7,2,2


In [23]:
five_building_and_weather = five_building_and_weather.drop(columns = ['Unnamed: 0', 'Total Residential Buildings (millions)', 'Before 1950 (millions)', 
                                                             '1950-1959 (millions)', '1960-1969 (millions)', '1970-1979 (millions)',
                                                             '1980-1989 (millions)', '1990-1999 (millions)', '2000-2009 (millions)',
                                                              '2010-2015 (millions)'])
five_building_and_weather.head()

Unnamed: 0,latitude,longitude,State,region,sub-region,country,month,year,max_temp_avg,min_temp_avg,daylight_hr_avg,rain_avg,snowfall_avg,2016-2020 (millions)
0,32.794376,-86.809296,Alabama,South,East South Central,US,1,2016,39.274536,24.255241,12.19835,0.086491,0.093635,2
1,32.794376,-86.809296,Alabama,South,East South Central,US,2,2016,45.903133,29.12705,12.19835,0.086491,0.093635,2
2,32.794376,-86.809296,Alabama,South,East South Central,US,3,2016,56.460552,38.427082,12.19835,0.086491,0.093635,2
3,32.794376,-86.809296,Alabama,South,East South Central,US,4,2016,62.170242,43.301945,12.19835,0.086491,0.093635,2
4,32.794376,-86.809296,Alabama,South,East South Central,US,5,2016,69.735977,51.51915,12.19835,0.086491,0.093635,2


In [24]:
five_building_and_weather.to_csv("pete_resources/5yr_home_and_weather.csv")

In [26]:
all_building_weather = pd.merge(weather_no_dups, building, on = 'region', how = 'left')

all_building_weather = all_building_weather.drop(columns = ['Unnamed: 0', 'Total Residential Buildings (millions)', 'Before 1950 (millions)', 
                                                             '1950-1959 (millions)', '1960-1969 (millions)', '1970-1979 (millions)',
                                                             '1980-1989 (millions)', '1990-1999 (millions)'])
all_building_weather.head()

Unnamed: 0,latitude,longitude,State,region,sub-region,country,month,year,max_temp_avg,min_temp_avg,daylight_hr_avg,rain_avg,snowfall_avg,2000-2009 (millions),2010-2015 (millions),2016-2020 (millions)
0,32.794376,-86.809296,Alabama,South,East South Central,US,1,2000,36.555183,21.022242,12.424892,0.080413,0.087108,7,2,2
1,32.794376,-86.809296,Alabama,South,East South Central,US,2,2000,47.419319,29.716642,12.424892,0.080413,0.087108,7,2,2
2,32.794376,-86.809296,Alabama,South,East South Central,US,3,2000,55.031422,36.188179,12.424892,0.080413,0.087108,7,2,2
3,32.794376,-86.809296,Alabama,South,East South Central,US,4,2000,61.308537,41.842239,12.424892,0.080413,0.087108,7,2,2
4,32.794376,-86.809296,Alabama,South,East South Central,US,5,2000,72.299355,53.421158,12.424892,0.080413,0.087108,7,2,2


In [27]:
all_building_weather.to_csv("pete_resources/all_home_and_weather.csv")