# Imports

In [1]:
import pandas as pd
import math
import numpy as np
import matplotlib.pyplot as plt
# https://towardsdatascience.com/calculating-distance-between-two-geolocations-in-python-26ad3afe287b
import haversine as hs
# https://www.geeksforgeeks.org/read-multiple-csv-files-into-separate-dataframes-in-python/
import os
# https://www.statology.org/pandas-merge-multiple-dataframes/
from functools import reduce

import warnings
warnings.simplefilter("ignore")

In [2]:
test = pd.read_csv('../data/monthly_test_data/latlong_1k.csv')

In [3]:
test.head()

Unnamed: 0,lat,lon
0,29.93341,-107.66498
1,31.82435,-107.78995
2,40.26557,-95.42612
3,37.58588,-95.80647
4,35.4573,-93.4763


# Functions

## Variable Extraction by Lat/Lon

In [4]:
def extract_feature(new_column_name, source_df_clean, extract_df, extract_df_lat_name, 
                    extract_df_lon_name, extract_df_value_column_name, lat_range = .1, lon_range = .1):

    '''
    Takes the latitude and logitude from test dataset and latitude and logitude from second dataset, 
    finds the most similar lat and long, by running a haversine function, then extracts the value and inputs into the epa
    dataset row with the closest lat/lon
    
    input:
        new_column_name: the name you want to give to the new epa column
        source_df: dataset to pull search lat/lon from and apply new values column (test)
        extract_df: name of dataset to extract value from
        extract_df_lat_name: name of latitude column from extract_df
        extract_df_lon_name: name of longitude column from extract_df
        extract_df_value_column_name: name of the column where the values to extract are located
        lat_range = range to search for latitudes, default is .1 unless specified
        lon_range = range to search for latitudes, default is .1 unless specified
    
    output: 
        epa dataset with new column - assign function, with inputs, to variable to get new df
    '''
    
    # create empty list for extracted values 
    # each value is extracted per row of the source_df and compared with lat/lon's in range from the extract_df
    extracted_values_list = [] 
    source_df = source_df_clean.copy()
    # uncomment for testing
    # source_df = source_df[:6]
    try:
        for i in range(len(source_df)):
            #### search extract df for latitudes in range of test latitudes and test latitudes +- range

            ## create range of lat and lon from source_df to search within

            # create lat range
            lat_search = float(source_df.iloc[i]['lat']) 
            lat_search_c_ceiling = lat_search + lat_range 
            lat_search_c_floor = lat_search - lat_range

            # create lon range
            lon_search = float(source_df.iloc[i]['lon']) 
            lon_search_c_ceiling = lon_search + lon_range
            lon_search_c_floor = lon_search - lon_range

            ## subset extraction_df by lat/lon floors and ceilings

            # subset extraction df by latitudes in range of test latitude for i
            # https://www.geeksforgeeks.org/numpy-where-in-python/
            lat_extraction_df = extract_df.loc[np.where(
                ((extract_df[extract_df_lat_name] >= lat_search_c_floor) & 
                 (extract_df[extract_df_lat_name] <= lat_search_c_ceiling))
            )].reset_index(drop = True)

            # subset lat_extraction_df by longitudes in range of test longitude for i
            lon_from_lat_extraction_df = lat_extraction_df.loc[np.where(
                ((lat_extraction_df[extract_df_lon_name] >= lon_search_c_floor) & 
                 (lat_extraction_df[extract_df_lon_name] <= lon_search_c_ceiling))
            )]
            if len(lon_from_lat_extraction_df) != 0:
                
                #### pull the row with the smallest haversine distance from the test lat/lon

                # grab lat/lon of current row of source_df (i)
                source_df_i_lat = float(source_df.iloc[i]['lat'])
                source_df_i_lon = float(source_df.iloc[i]['lon'])

                # create list of haversine values in range of source_df lat/lon current row
                hav_list = []
                for j in range(len(lon_from_lat_extraction_df)):
                    loc1 = (source_df_i_lat, source_df_i_lon)
                    loc2 = (float(lon_from_lat_extraction_df.iloc[j][extract_df_lat_name]), float(lon_from_lat_extraction_df.iloc[j][extract_df_lon_name]))
                    hav = hs.haversine(loc1, loc2)
                    hav_list.append(float(hav))

                # add haversine column and values to current row's temporary subset dataframe
                lon_from_lat_extraction_df['haversine'] = hav_list

                # find extracted value from temporary subset dataframe based on minimum haversine
                hav_min = lon_from_lat_extraction_df['haversine'].min()
                extracted_row = lon_from_lat_extraction_df[lon_from_lat_extraction_df['haversine'] == hav_min].index
                # https://stackoverflow.com/questions/62235344/how-to-return-a-cell-value-from-pandas-instead-of-a-series
                extracted_value = lon_from_lat_extraction_df.loc[extracted_row, extract_df_value_column_name].values[0]
                extracted_values_list.append(extracted_value)
            
            else:
                extracted_values_list.append(np.nan)

    except IndexError:
        print(f'Index Error at row {i}')

    # set new epa column with name
    source_df[new_column_name] = extracted_values_list
    
    return source_df

## Read in Monthly CSVs

In [5]:
def get_monthly_datasets(file_path):
    '''
    Goes into a parent folder and creates a dataset for each csv file in that folder. Must have 12 files.

    file_path: file path for the parent folder of the files

    Output: A dataset for each file (assign the function to the dataset names, in sequence of files)
    '''
    # https://www.geeksforgeeks.org/read-multiple-csv-files-into-separate-dataframes-in-python/
    
    # assign path
    path, dirs, files = next(os.walk(file_path))
    file_count = len(files)
    # create empty list
    dataframes_list = []

    # append datasets to the list
    for i in range(file_count):
        if files[i].__contains__('.csv'):
            temp_df = pd.read_csv(file_path+files[i])
            print(file_path+files[i])
            if 'Unnamed: 0' in temp_df.columns:
                temp_df.drop(['Unnamed: 0'], axis = 1, inplace = True)
                dataframes_list.append(temp_df)
            else:
                dataframes_list.append(temp_df)
        else:
            pass
        
    return dataframes_list

# Extract Monthly CSV Files to DFs

In [6]:
AOD_2018 = get_monthly_datasets('../data/AODdata_cleaned/2018/')

../data/AODdata_cleaned/2018/AOD_08.csv
../data/AODdata_cleaned/2018/AOD_09.csv
../data/AODdata_cleaned/2018/AOD_07.csv
../data/AODdata_cleaned/2018/AOD_12.csv
../data/AODdata_cleaned/2018/AOD_06.csv
../data/AODdata_cleaned/2018/AOD_10.csv
../data/AODdata_cleaned/2018/AOD_05.csv
../data/AODdata_cleaned/2018/AOD_11.csv


In [7]:
# assigned in order of get_monthly_datasets output
aod_18_8, aod_18_9, aod_18_7, aod_18_12, aod_18_6, aod_18_10, aod_18_5, aod_18_11 = [pd.DataFrame(each) for each in AOD_2018]

In [8]:
AOD_2018_list = [aod_18_8, aod_18_9, aod_18_7, aod_18_12, aod_18_6, aod_18_10, aod_18_5, aod_18_11]

## AOD

In [9]:
AOD_2019 = get_monthly_datasets('../data/AODdata_cleaned/2019/')

../data/AODdata_cleaned/2019/AOD_08.csv
../data/AODdata_cleaned/2019/AOD_09.csv
../data/AODdata_cleaned/2019/AOD_01.csv
../data/AODdata_cleaned/2019/AOD_02.csv
../data/AODdata_cleaned/2019/AOD_03.csv
../data/AODdata_cleaned/2019/AOD_07.csv
../data/AODdata_cleaned/2019/AOD_12.csv
../data/AODdata_cleaned/2019/AOD_06.csv
../data/AODdata_cleaned/2019/AOD_10.csv
../data/AODdata_cleaned/2019/AOD_04.csv
../data/AODdata_cleaned/2019/AOD_05.csv
../data/AODdata_cleaned/2019/AOD_11.csv


In [10]:
aod_19_8, aod_19_9, aod_19_1, aod_19_2, aod_19_3, aod_19_7, aod_19_12, aod_19_6, aod_19_10, aod_19_4, aod_19_5, aod_19_11 = [pd.DataFrame(each) for each in AOD_2019]

In [11]:
AOD_2019_list = [aod_19_8, aod_19_9, aod_19_1, aod_19_2, aod_19_3, aod_19_7, aod_19_12, aod_19_6, aod_19_10, aod_19_4, aod_19_5, aod_19_11]

## NO2

In [15]:
NO2_2019 = get_monthly_datasets('../data/NO2data_cleaned/2019/')

../data/NO2data_cleaned/2019/NO2_10.csv
../data/NO2data_cleaned/2019/NO2_04.csv
../data/NO2data_cleaned/2019/NO2_05.csv
../data/NO2data_cleaned/2019/NO2_11.csv
../data/NO2data_cleaned/2019/NO2_07.csv
../data/NO2data_cleaned/2019/NO2_12.csv
../data/NO2data_cleaned/2019/NO2_06.csv
../data/NO2data_cleaned/2019/NO2_02.csv
../data/NO2data_cleaned/2019/NO2_03.csv
../data/NO2data_cleaned/2019/NO2_01.csv
../data/NO2data_cleaned/2019/NO2_08.csv
../data/NO2data_cleaned/2019/NO2_09.csv


In [16]:
no2_19_10, no2_19_4, no2_19_5, no2_19_11, no2_19_7, no2_19_12, no2_19_6, no2_19_2, no2_19_3, no2_19_1, no2_19_8, no2_19_9 = [pd.DataFrame(each) for each in NO2_2019]

In [17]:
NO2_2019_list = [no2_19_10, no2_19_4, no2_19_5, no2_19_11, no2_19_7, no2_19_12, no2_19_6, 
                 no2_19_2, no2_19_3, no2_19_1, no2_19_8, no2_19_9]

## Precipitation

In [21]:
P_2019 = get_monthly_datasets('../data/precipdata_cleaned/2019/')

../data/precipdata_cleaned/2019/precip_02.csv
../data/precipdata_cleaned/2019/precip_03.csv
../data/precipdata_cleaned/2019/precip_01.csv
../data/precipdata_cleaned/2019/precip_04.csv
../data/precipdata_cleaned/2019/precip_10.csv
../data/precipdata_cleaned/2019/precip_11.csv
../data/precipdata_cleaned/2019/precip_05.csv
../data/precipdata_cleaned/2019/precip_07.csv
../data/precipdata_cleaned/2019/precip_06.csv
../data/precipdata_cleaned/2019/precip_12.csv
../data/precipdata_cleaned/2019/precip_08.csv
../data/precipdata_cleaned/2019/precip_09.csv


In [22]:
p_19_2, p_19_3, p_19_1, p_19_4, p_19_10, p_19_11, p_19_5, p_19_7, p_19_6, p_19_12, p_19_8, p_19_9 = [pd.DataFrame(each) for each in P_2019]

In [23]:
P_2019_list = [p_19_2, p_19_3, p_19_1, p_19_4, p_19_10, p_19_11, p_19_5, p_19_7, p_19_6, p_19_12, p_19_8, p_19_9]

## Temperature

In [27]:
T_2019 = get_monthly_datasets('../data/tempdata_cleaned/2019/')

../data/tempdata_cleaned/2019/temp_09.csv
../data/tempdata_cleaned/2019/temp_08.csv
../data/tempdata_cleaned/2019/temp_01.csv
../data/tempdata_cleaned/2019/temp_03.csv
../data/tempdata_cleaned/2019/temp_02.csv
../data/tempdata_cleaned/2019/temp_12.csv
../data/tempdata_cleaned/2019/temp_06.csv
../data/tempdata_cleaned/2019/temp_07.csv
../data/tempdata_cleaned/2019/temp_05.csv
../data/tempdata_cleaned/2019/temp_11.csv
../data/tempdata_cleaned/2019/temp_10.csv
../data/tempdata_cleaned/2019/temp_04.csv


In [28]:
t_19_9, t_19_8, t_19_1, t_19_3, t_19_2, t_19_12, t_19_6, t_19_7, t_19_5, t_19_11, t_19_10, t_19_4 = [pd.DataFrame(each) for each in T_2019]

In [29]:
T_2019_list = [t_19_9, t_19_8, t_19_1, t_19_3, t_19_2, t_19_12, t_19_6, t_19_7, t_19_5, t_19_11, t_19_10, t_19_4]

# Extract Lat/Lon for Each Month w/ Variables

In [30]:
aod_18_8 = extract_feature('AOD_2018', test, aod_18_8, 'Latitude', 'Longitude', 
                         'AOD_550_AVG', lat_range = 1, lon_range = 1)

In [31]:
aod_18_9 = extract_feature('AOD_2018', test, aod_18_9, 'Latitude', 'Longitude', 
                         'AOD_550_AVG', lat_range = 1, lon_range = 1)

In [32]:
aod_18_7 = extract_feature('AOD_2018', test, aod_18_7, 'Latitude', 'Longitude', 
                         'AOD_550_AVG', lat_range = 1, lon_range = 1)

In [33]:
aod_18_12 = extract_feature('AOD_2018', test, aod_18_12, 'Latitude', 'Longitude', 
                         'AOD_550_AVG', lat_range = 1, lon_range = 1)

In [34]:
aod_18_6 = extract_feature('AOD_2018', test, aod_18_6, 'Latitude', 'Longitude', 
                         'AOD_550_AVG', lat_range = 1, lon_range = 1)

In [35]:
aod_18_10 = extract_feature('AOD_2018', test, aod_18_10, 'Latitude', 'Longitude', 
                         'AOD_550_AVG', lat_range = 1, lon_range = 1)

In [36]:
aod_18_5 = extract_feature('AOD_2018', test, aod_18_5, 'Latitude', 'Longitude', 
                         'AOD_550_AVG', lat_range = 1, lon_range = 1)

In [37]:
aod_18_11 = extract_feature('AOD_2018', test, aod_18_11, 'Latitude', 'Longitude', 
                         'AOD_550_AVG', lat_range = 1, lon_range = 1)

## AOD 2019
aod_19_8, aod_19_9, aod_19_1, aod_19_2, aod_19_3, aod_19_7, aod_19_12, aod_19_6, aod_19_10, aod_19_4, aod_19_5, aod_19_11

In [38]:
aod_19_8 = extract_feature('AOD_2019', test, aod_19_8, 'Latitude', 'Longitude', 
                         'AOD_550_AVG', lat_range = 1, lon_range = 1)

In [39]:
aod_19_9 = extract_feature('AOD_2019', test, aod_19_9, 'Latitude', 'Longitude', 
                         'AOD_550_AVG', lat_range = 1, lon_range = 1)

In [40]:
aod_19_1 = extract_feature('AOD_2019', test, aod_19_1, 'Latitude', 'Longitude', 
                         'AOD_550_AVG', lat_range = 1, lon_range = 1)

In [41]:
aod_19_2 = extract_feature('AOD_2019', test, aod_19_2, 'Latitude', 'Longitude', 
                          'AOD_550_AVG', lat_range = 1, lon_range = 1)

In [42]:
aod_19_3 = extract_feature('AOD_2019', test, aod_19_3, 'Latitude', 'Longitude', 
                         'AOD_550_AVG', lat_range = 1, lon_range = 1)

In [43]:
aod_19_7 = extract_feature('AOD_2019', test, aod_19_7, 'Latitude', 'Longitude', 
                         'AOD_550_AVG', lat_range = 1, lon_range = 1)

In [44]:
aod_19_12 = extract_feature('AOD_2019', test, aod_19_12, 'Latitude', 'Longitude', 
                         'AOD_550_AVG', lat_range = 1, lon_range = 1)

In [45]:
aod_19_6 = extract_feature('AOD_2019', test, aod_19_6, 'Latitude', 'Longitude', 
                         'AOD_550_AVG', lat_range = 1, lon_range = 1)

In [46]:
aod_19_10 = extract_feature('AOD_2019', test, aod_19_10, 'Latitude', 'Longitude', 
                         'AOD_550_AVG', lat_range = 1, lon_range = 1)

In [47]:
aod_19_4 = extract_feature('AOD_2019', test, aod_19_4, 'Latitude', 'Longitude', 
                         'AOD_550_AVG', lat_range = 1, lon_range = 1)

In [48]:
aod_19_5 = extract_feature('AOD_2019', test, aod_19_5, 'Latitude', 'Longitude', 
                         'AOD_550_AVG', lat_range = 1, lon_range = 1)

In [49]:
aod_19_11 = extract_feature('AOD_2019', test, aod_19_11, 'Latitude', 'Longitude', 
                         'AOD_550_AVG', lat_range = 1, lon_range = 1)

In [50]:
no2_18_10 = extract_feature('NO2_2018', test, no2_18_10, 'lat', 'lon', 
                         'NO2', lat_range = 1, lon_range = 1)

In [51]:
no2_18_5 = extract_feature('NO2_2018', test, no2_18_5, 'lat', 'lon', 
                         'NO2', lat_range = 1, lon_range = 1)

In [52]:
no2_18_11 = extract_feature('NO2_2018', test, no2_18_11, 'lat', 'lon', 
                         'NO2', lat_range = 1, lon_range = 1)

In [53]:
no2_18_7 = extract_feature('NO2_2018', test, no2_18_7, 'lat', 'lon', 
                         'NO2', lat_range = 1, lon_range = 1)

In [54]:
no2_18_12 = extract_feature('NO2_2018', test, no2_18_12, 'lat', 'lon', 
                         'NO2', lat_range = 1, lon_range = 1)

In [55]:
no2_18_6 = extract_feature('NO2_2018', test, no2_18_6, 'lat', 'lon', 
                         'NO2', lat_range = 1, lon_range = 1)

In [56]:
no2_18_8 = extract_feature('NO2_2018', test, no2_18_8, 'lat', 'lon', 
                         'NO2', lat_range = 1, lon_range = 1)

In [57]:
no2_18_9 = extract_feature('NO2_2018', test, no2_18_9, 'lat', 'lon', 
                         'NO2', lat_range = 1, lon_range = 1)

## NO2 2019
no2_19_10, no2_19_4, no2_19_5, no2_19_11, no2_19_7, no2_19_12, no2_19_6, no2_19_2, no2_19_3, no2_19_1, no2_19_8, no2_19_9

In [58]:
no2_19_10 = extract_feature('NO2_2019', test, no2_19_10, 'lat', 'lon', 
                         'NO2', lat_range = 1, lon_range = 1)

In [59]:
no2_19_4 = extract_feature('NO2_2019', test, no2_19_4, 'lat', 'lon', 
                         'NO2', lat_range = 1, lon_range = 1)

In [60]:
no2_19_5 = extract_feature('NO2_2019', test, no2_19_5, 'lat', 'lon', 
                         'NO2', lat_range = 1, lon_range = 1)

In [61]:
no2_19_11 = extract_feature('NO2_2019', test, no2_19_11, 'lat', 'lon', 
                         'NO2', lat_range = 1, lon_range = 1)

In [62]:
no2_19_7 = extract_feature('NO2_2019', test, no2_19_7, 'lat', 'lon', 
                         'NO2', lat_range = 1, lon_range = 1)

In [63]:
no2_19_12 = extract_feature('NO2_2019', test, no2_19_12, 'lat', 'lon', 
                         'NO2', lat_range = 1.5, lon_range = 1.5)

In [64]:
no2_19_6 = extract_feature('NO2_2019', test, no2_19_6, 'lat', 'lon', 
                         'NO2', lat_range = 1, lon_range = 1)

In [65]:
no2_19_2 = extract_feature('NO2_2019', test, no2_19_2, 'lat', 'lon', 
                         'NO2', lat_range = 1, lon_range = 1)

In [66]:
no2_19_3 = extract_feature('NO2_2019', test, no2_19_3, 'lat', 'lon', 
                         'NO2', lat_range = 1, lon_range = 1)

In [67]:
no2_19_1 = extract_feature('NO2_2019', test, no2_19_1, 'lat', 'lon', 
                         'NO2', lat_range = 1, lon_range = 1)

In [68]:
no2_19_8 = extract_feature('NO2_2019', test, no2_19_8, 'lat', 'lon', 
                         'NO2', lat_range = 1, lon_range = 1)

In [69]:
no2_19_9 = extract_feature('NO2_2019', test, no2_19_9, 'lat', 'lon', 
                         'NO2', lat_range = 1, lon_range = 1)

In [70]:
p_18_10 = extract_feature('precip_2018', test, p_18_10, 'lat', 'lon', 
                         'precip', lat_range = 1.5, lon_range = 1.5)

In [71]:
p_18_11 = extract_feature('precip_2018', test, p_18_11, 'lat', 'lon', 
                         'precip', lat_range = 1, lon_range = 1)

In [72]:
p_18_5 = extract_feature('precip_2018', test, p_18_5, 'lat', 'lon', 
                         'precip', lat_range = 1, lon_range = 1)

In [73]:
p_18_7 = extract_feature('precip_2018', test, p_18_7, 'lat', 'lon', 
                         'precip', lat_range = 1, lon_range = 1)

In [74]:
p_18_6 = extract_feature('precip_2018', test, p_18_6, 'lat', 'lon', 
                         'precip', lat_range = 1, lon_range = 1)

In [75]:
p_18_12 = extract_feature('precip_2018', test, p_18_12, 'lat', 'lon', 
                         'precip', lat_range = 1, lon_range = 1)

In [76]:
p_18_8 = extract_feature('precip_2018', test, p_18_8, 'lat', 'lon', 
                         'precip', lat_range = 1, lon_range = 1)

In [77]:
p_18_9 = extract_feature('precip_2018', test, p_18_9, 'lat', 'lon', 
                         'precip', lat_range = 1, lon_range = 1)

## Precipitation 2019
p_19_2, p_19_3, p_19_1, p_19_4, p_19_10, p_19_11, p_19_5, p_19_7, p_19_6, p_19_12, p_19_8, p_19_9

In [78]:
p_19_2 = extract_feature('precip_2019', test, p_19_2, 'lat', 'lon', 
                         'precip', lat_range = 1, lon_range = 1)

In [79]:
p_19_3 = extract_feature('precip_2019', test, p_19_3, 'lat', 'lon', 
                         'precip', lat_range = 1, lon_range = 1)

In [80]:
p_19_1 = extract_feature('precip_2019', test, p_19_1, 'lat', 'lon', 
                         'precip', lat_range = 1, lon_range = 1)

In [81]:
p_19_4 = extract_feature('precip_2019', test, p_19_4, 'lat', 'lon', 
                         'precip', lat_range = 1, lon_range = 1)

In [82]:
p_19_10 = extract_feature('precip_2019', test, p_19_10, 'lat', 'lon', 
                         'precip', lat_range = 1, lon_range = 1)

In [83]:
p_19_11 = extract_feature('precip_2019', test, p_19_11, 'lat', 'lon', 
                         'precip', lat_range = 1, lon_range = 1)

In [84]:
p_19_5 = extract_feature('precip_2019', test, p_19_5, 'lat', 'lon', 
                         'precip', lat_range = 1, lon_range = 1)

In [85]:
p_19_7 = extract_feature('precip_2019', test, p_19_7, 'lat', 'lon', 
                         'precip', lat_range = 1, lon_range = 1)

In [86]:
p_19_6 = extract_feature('precip_2019', test, p_19_6, 'lat', 'lon', 
                         'precip', lat_range = 1, lon_range = 1)

In [87]:
p_19_12 = extract_feature('precip_2019', test, p_19_12, 'lat', 'lon', 
                         'precip', lat_range = 1, lon_range = 1)

In [88]:
p_19_8 = extract_feature('precip_2019', test, p_19_8, 'lat', 'lon', 
                         'precip', lat_range = 1, lon_range = 1)

In [89]:
p_19_9 = extract_feature('precip_2019', test, p_19_9, 'lat', 'lon', 
                         'precip', lat_range = 1, lon_range = 1)

In [90]:
t_18_9 = extract_feature('temp_2018', test, t_18_9, 'lat', 'lon', 
                         'temp', lat_range = 1, lon_range = 1)

In [91]:
t_18_8 = extract_feature('temp_2018', test, t_18_8, 'lat', 'lon', 
                         'temp', lat_range = 1, lon_range = 1)

In [92]:
t_18_12 = extract_feature('temp_2018', test, t_18_12, 'lat', 'lon', 
                         'temp', lat_range = 1, lon_range = 1)

In [None]:
t_18_6 = extract_feature('temp_2018', test, t_18_6, 'lat', 'lon', 
                         'temp', lat_range = 1, lon_range = 1)

In [None]:
t_18_7 = extract_feature('temp_2018', test, t_18_7, 'lat', 'lon', 
                         'temp', lat_range = 1, lon_range = 1)

In [None]:
t_18_5 = extract_feature('temp_2018', test, t_18_5, 'lat', 'lon', 
                         'temp', lat_range = 1, lon_range = 1)

In [None]:
t_18_11 = extract_feature('temp_2018', test, t_18_11, 'lat', 'lon', 
                         'temp', lat_range = 1, lon_range = 1)

In [None]:
t_18_10 = extract_feature('temp_2018', test, t_18_10, 'lat', 'lon', 
                         'temp', lat_range = 1, lon_range = 1)

## Temperature 2019
t_19_9, t_19_8, t_19_1, t_19_3, t_19_2, t_19_12, t_19_6, t_19_7, t_19_5, t_19_11, t_19_10, t_19_4

In [None]:
t_19_9 = extract_feature('temp_2019', test, t_19_9, 'lat', 'lon', 
                         'temp', lat_range = 1, lon_range = 1)

In [None]:
t_19_8 = extract_feature('temp_2019', test, t_19_8, 'lat', 'lon', 
                         'temp', lat_range = 1, lon_range = 1)

In [None]:
t_19_1 = extract_feature('temp_2019', test, t_19_1, 'lat', 'lon', 
                         'temp', lat_range = 1, lon_range = 1)

In [None]:
t_19_3 = extract_feature('temp_2019', test, t_19_3, 'lat', 'lon', 
                         'temp', lat_range = 1, lon_range = 1)

In [None]:
t_19_2 = extract_feature('temp_2019', test, t_19_2, 'lat', 'lon', 
                         'temp', lat_range = 1, lon_range = 1)

In [None]:
t_19_12 = extract_feature('temp_2019', test, t_19_12, 'lat', 'lon', 
                         'temp', lat_range = 1, lon_range = 1)

In [None]:
t_19_6 = extract_feature('temp_2019', test, t_19_6, 'lat', 'lon', 
                         'temp', lat_range = 1, lon_range = 1)

In [None]:
t_19_7 = extract_feature('temp_2019', test, t_19_7, 'lat', 'lon', 
                         'temp', lat_range = 1, lon_range = 1)

In [None]:
t_19_5 = extract_feature('temp_2019', test, t_19_5, 'lat', 'lon', 
                         'temp', lat_range = 1, lon_range = 1)

In [None]:
t_19_11 = extract_feature('temp_2019', test, t_19_11, 'lat', 'lon', 
                         'temp', lat_range = 1, lon_range = 1)

In [None]:
t_19_10 = extract_feature('temp_2019', test, t_19_10, 'lat', 'lon', 
                         'temp', lat_range = 1, lon_range = 1)

In [None]:
t_19_4 = extract_feature('temp_2019', test, t_19_4, 'lat', 'lon', 
                         'temp', lat_range = 1, lon_range = 1)

# Combining Datasets

## Merge Variables for Each Month into Monthly Dataframe

In [None]:
# https://www.statology.org/pandas-merge-multiple-dataframes/

In [None]:
jan_19 = [aod_19_1, no2_19_1, p_19_1, t_19_1]
jan_19 = reduce(lambda  left,right: pd.merge(left,right,on=['lat', 'lon'], how='left'), jan_19)

feb_19 = [aod_19_2, no2_19_2, p_19_2, t_19_2]
feb_19 = reduce(lambda  left,right: pd.merge(left,right,on=['lat', 'lon'], how='left'), feb_19)

mar_19 = [aod_19_3, no2_19_3, p_19_3, t_19_3]
mar_19 = reduce(lambda  left,right: pd.merge(left,right,on=['lat', 'lon'], how='left'), mar_19)

apr_19 = [aod_19_4, no2_19_4, p_19_4, t_19_4]
apr_19 = reduce(lambda  left,right: pd.merge(left,right,on=['lat', 'lon'], how='left'), apr_19)

may_19 = [aod_19_5, no2_19_5, p_19_5, t_19_5]
may_19 = reduce(lambda  left,right: pd.merge(left,right,on=['lat', 'lon'], how='left'), may_19)

jun_19 = [aod_19_6, no2_19_6, p_19_6, t_19_6]
jun_19 = reduce(lambda  left,right: pd.merge(left,right,on=['lat', 'lon'], how='left'), jun_19)

jul_19 = [aod_19_7, no2_19_7, p_19_7, t_19_7]
jul_19 = reduce(lambda  left,right: pd.merge(left,right,on=['lat', 'lon'], how='left'), jul_19)

aug_19 = [aod_19_8, no2_19_8, p_19_8, t_19_8]
aug_19 = reduce(lambda  left,right: pd.merge(left,right,on=['lat', 'lon'], how='left'), aug_19)

sep_19 = [aod_19_9, no2_19_9, p_19_9, t_19_9]
sep_19 = reduce(lambda  left,right: pd.merge(left,right,on=['lat', 'lon'], how='left'), sep_19)

oct_19 = [aod_19_10, no2_19_10, p_19_10, t_19_10]
oct_19 = reduce(lambda  left,right: pd.merge(left,right,on=['lat', 'lon'], how='left'), oct_19)

nov_19 = [aod_19_11, no2_19_11, p_19_11, t_19_11]
nov_19 = reduce(lambda  left,right: pd.merge(left,right,on=['lat', 'lon'], how='left'), nov_19)

dec_19 = [aod_19_12, no2_19_12, p_19_12, t_19_12]
dec_19 = reduce(lambda  left,right: pd.merge(left,right,on=['lat', 'lon'], how='left'), dec_19)

## Create List of Monthly Dataframes for Each Year

In [None]:
y_19 = [jan_19, feb_19, mar_19, apr_19, may_19, jun_19, jul_19, aug_19, sep_19, oct_19, nov_19, dec_19]

## Check Nulls

In [None]:
for month in y_19:
    print(month.isnull().sum())

# Save to csv

In [None]:
# i = 1
# for month in y_19:
    # month.to_csv(f'../data/monthly_test_data/result_1k/{str(i)}_2019.csv', index=False)
    # i += 1