## MAIN PROGRAM FILE

### Import dependencies

In [1]:
import pandas as pd
import os
from config import gkey
import gmaps
from scipy.stats import linregress
from matplotlib import pyplot as plt
from scipy import stats
import numpy as np
import requests
from pprint import pprint
import json

#### Calling the CSV files and combining the data on both CSVs for State = Delaware

In [2]:
###CALLLING OUT CSV FILE LOCATIONS
csv_path_1 = os.path.join ("..","Resources","Census_Data_2015.csv")
csv_path_2 = os.path.join("..", "Resources","Tract_lat_lon.csv")

### READING CSV DATA INTO DATA FRAMES
Maindata_1_df = pd.read_csv(csv_path_1)
lat_lon_tract_df = pd.read_csv(csv_path_2)

###CHECKING DATA FRAME HEADS TO ENSURE DATA IS BEING CORRECTLY READ
Maindata_1_df.head()
lat_lon_tract_df.head()

###MERGING TWO DATA FRAMES TO GET FINAL DATAFRAME WITH CENSUS AND GEOGROPHIC INFO COMBINED
# THE INNER MERGE WILL ONLY SHOW THOSE ROWS THAT HAVE INFO IN BOTH CSV FILES. IN DOING THIS WE LOST DATA OF 25 TRACT WHICH DID NOT HAVE LAT LONGS

Combined_df = pd.merge(Maindata_1_df, lat_lon_tract_df, on='CENSUSTRACT', how = 'inner')

###CHECKING DATA FRAME SHAPE TO SEE HOW MANY ROWS AND COLUMNS ARE THERE. 
###THIS HV_final_df IS THE DATA FRAME THAT CAN BE USED BY ALL TEAM MEMBERS FOR THEIR INDIVIDUAL PROCESSING


###OUTPUT THE FINAL DATA FRAME WITH LAT LON + CENSUS DATA INTO CSV FILE IN OUTPUT FOLDER
###HV_final_df.to_csv(os.path.join("..","Output", "Census_data_and_lat_lon.csv"), encoding = "utf-8", index = False)
Combined_df.head()

## DATA FRAME FOR ALL DATA FOR A SINGLE STATE TO REDUCE COMPUTATIONAL TIME FOR ETL PROJECT##
## SELECTED BELOW IS DATA FOR DELAWARE  ##


Combined_State_df = Combined_df[Combined_df ["State"] == 'Delaware']
Combined_State_df.head()


Unnamed: 0,CENSUSTRACT,State,County,Urban,POP2010,OHU2010,GroupQuartersFlag,NUMGQTRS,PCTGQTRS,LILATracts_1And10,...,TractOMultir,TractHispanic,TractHUNV,TractSNAP,ALAND,AWATER,ALAND_SQMI,AWATER_SQMI,LAT,LON
13674,10001040100,Delaware,Kent,0,6541,2325,0,0,0.0,0,...,183,247,39,418,124745855.0,0,48.165,0.0,39.237284,-75.694741
13675,10001040201,Delaware,Kent,0,5041,1849,0,2,0.000397,0,...,290,312,160,322,9730214.0,37235,3.757,0.014,39.290841,-75.637508
13676,10001040202,Delaware,Kent,1,12763,4451,0,340,0.026639,0,...,568,593,121,249,31927916.0,700605,12.327,0.271,39.263964,-75.611123
13677,10001040203,Delaware,Kent,0,5017,1877,0,0,0.0,0,...,269,261,146,289,59860927.0,1054612,23.112,0.407,39.285868,-75.550836
13678,10001040501,Delaware,Kent,1,4923,1910,0,0,0.0,0,...,392,413,142,345,10394085.0,0,4.013,0.0,39.199584,-75.543902


#### Copied the tables to a new table with the columns we need

In [3]:
sv_new_Combined_State_df = Combined_State_df[['CENSUSTRACT', 'State', 'County', 'POP2010', 'OHU2010', 'MedianFamilyIncome', 'TractHUNV', 'PovertyRate', 'LAT', 'LON']].copy()
sv_new_Combined_State_df.head()

Unnamed: 0,CENSUSTRACT,State,County,POP2010,OHU2010,MedianFamilyIncome,TractHUNV,PovertyRate,LAT,LON
13674,10001040100,Delaware,Kent,6541,2325,71188,39,10.7,39.237284,-75.694741
13675,10001040201,Delaware,Kent,5041,1849,54826,160,8.7,39.290841,-75.637508
13676,10001040202,Delaware,Kent,12763,4451,73155,121,5.3,39.263964,-75.611123
13677,10001040203,Delaware,Kent,5017,1877,69273,146,11.9,39.285868,-75.550836
13678,10001040501,Delaware,Kent,4923,1910,57891,142,16.9,39.199584,-75.543902


#### Calling on google places API for getting car dealer infora

In [4]:
## GOOGLE PLACES API CALL OUT FOR GETTING DEALER INFO

car_dealers = []

# PARAMS DICT TO UPDATE ALONG EACH ROW
params = {
    "radius": 50000,
    "types": "car_dealer",
    "key": gkey
}

# USING LAT AND LON FROM THE STATE DATA FRAME ABOVE#
for index, row in sv_new_Combined_State_df.iterrows():
    
    # GET LAT & LON FROM COMBINED_TEXAS_DF
    lat = row["LAT"]
    lng = row["LON"]
   
    # change location each iteration while leaving original params in place
    params["location"] = f"{lat},{lng}"

    # Use the search term: "International Airport" and our lat/lng
    base_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"

    # make request and print url
    dealers_info = requests.get(base_url, params=params)


    # convert to json
    dealers_response = dealers_info.json()
  
    try:
        car_dealers.append(dealers_response["results"][0]["name"])
    except:
        car_dealers.append("Nearest car dealer not found")

# Dataframe with nearest car dealer
sv_new_Combined_State_df["Nearest Car Dealer"] = car_dealers
sv_new_Combined_State_df  
    
    

Unnamed: 0,CENSUSTRACT,State,County,POP2010,OHU2010,MedianFamilyIncome,TractHUNV,PovertyRate,LAT,LON,Nearest Car Dealer
13674,10001040100,Delaware,Kent,6541,2325,71188,39,10.7,39.237284,-75.694741,Winner Ford
13675,10001040201,Delaware,Kent,5041,1849,54826,160,8.7,39.290841,-75.637508,Martin Dealerships
13676,10001040202,Delaware,Kent,12763,4451,73155,121,5.3,39.263964,-75.611123,New Jersey Motorsports Park
13677,10001040203,Delaware,Kent,5017,1877,69273,146,11.9,39.285868,-75.550836,New Jersey Motorsports Park
13678,10001040501,Delaware,Kent,4923,1910,57891,142,16.9,39.199584,-75.543902,New Jersey Motorsports Park
...,...,...,...,...,...,...,...,...,...,...,...
13887,10005051702,Delaware,Sussex,5613,2112,67273,88,13.7,38.490785,-75.450919,Preston Autoplex
13888,10005051801,Delaware,Sussex,4882,1921,65265,152,12.7,38.555469,-75.657086,Preston Autoplex
13889,10005051802,Delaware,Sussex,4190,1509,47465,106,23.0,38.538569,-75.570692,Preston Autoplex
13890,10005051900,Delaware,Sussex,4565,1685,66050,62,13.1,38.488638,-75.625854,Preston Autoplex
