### Importing Dependencies

In [1]:
# Dependencies
import pandas as pd
import numpy as np

### Creating Dataframe from the csv file

In [2]:
#Create a dataframe from the csv file
rent_df = pd.read_csv("rent_raw.csv")
rent_df.head()

Unnamed: 0,Region Name,Region Type,Data Type,Nov-10,Dec-10,Jan-11,Feb-11,Mar-11,Apr-11,May-11,...,Dec-17,Jan-18,Feb-18,Mar-18,Apr-18,May-18,Jun-18,Jul-18,Aug-18,Sep-18
0,Bayview,neighborhood,All Homes,3183,3179,3078,2917,2706,2571,2480,...,3713,3663,3624,3602,3579,3558,3535,3519,3508,3502
1,Bernal Heights,neighborhood,All Homes,3155,3146,3110,3013,2882,2798,2763,...,4490,4470,4457,4448,4444,4440,4443,4451,4459,4465
2,Buena Vista,neighborhood,All Homes,3551,3604,3636,3601,3514,3413,3362,...,5207,5155,5139,5158,5187,5203,5201,5198,5203,5212
3,Corona Heights,neighborhood,All Homes,3504,3544,3551,3500,3406,3316,3261,...,5082,5070,5057,5060,5069,5082,5086,5087,5087,5083
4,Cow Hollow,neighborhood,All Homes,4243,4401,4438,4412,4324,4319,4370,...,6050,6061,6052,6034,6009,5979,5964,5972,6013,6065


### Cleaning Data and Creating new DataFrame

In [3]:
# Remove the Region Type, Data Type columns
rent_df = rent_df.drop(["Region Type", "Data Type"], axis = 1)

In [4]:
# Rename the columns
rent_df = rent_df.rename(columns = {"Region Name": "Neighborhood"})
rent_df.head()

Unnamed: 0,Neighborhood,Nov-10,Dec-10,Jan-11,Feb-11,Mar-11,Apr-11,May-11,Jun-11,Jul-11,...,Dec-17,Jan-18,Feb-18,Mar-18,Apr-18,May-18,Jun-18,Jul-18,Aug-18,Sep-18
0,Bayview,3183,3179,3078,2917,2706,2571,2480,2450,2437,...,3713,3663,3624,3602,3579,3558,3535,3519,3508,3502
1,Bernal Heights,3155,3146,3110,3013,2882,2798,2763,2767,2780,...,4490,4470,4457,4448,4444,4440,4443,4451,4459,4465
2,Buena Vista,3551,3604,3636,3601,3514,3413,3362,3371,3428,...,5207,5155,5139,5158,5187,5203,5201,5198,5203,5212
3,Corona Heights,3504,3544,3551,3500,3406,3316,3261,3265,3307,...,5082,5070,5057,5060,5069,5082,5086,5087,5087,5083
4,Cow Hollow,4243,4401,4438,4412,4324,4319,4370,4442,4495,...,6050,6061,6052,6034,6009,5979,5964,5972,6013,6065


In [5]:
# Get a series of neighbourhoods
neighborhood = rent_df["Neighborhood"]

In [6]:
# Create a function that returns the sum per row per year

def totals(df,str): # where str is the last two digits of the year
    year = df.filter(regex = str, axis = 1) # filter the columns based on the str
    
    return year.sum(axis = 1) # get the sum per row

In [7]:
# Create a list of years (with data for 12 months)
year_list = list(np.arange(11,18)) # creates a list of floats covering the year range of rent_df
year_list_str = [str(item) for item in year_list] # converts numbers to string
    
year_list_str

['11', '12', '13', '14', '15', '16', '17']

In [8]:
# Create an array of yearly rates per neighbourhood using the totals function
yearly_rate = [totals(rent_df,year) for year in year_list_str] 

In [9]:
# Create a list of keys by adding "20" to the last two digits of the year
keys = year_list_str
keys = ["20" + key for key in keys]

# Insert Neighbourhood as a key
keys.insert(0,"Neighborhood")
keys

['Neighborhood', '2011', '2012', '2013', '2014', '2015', '2016', '2017']

In [10]:
# Create a list of values by adding the neighbourhood series to the yearly_rate array
values = yearly_rate
values.insert(0,neighborhood)

In [11]:
# Create a dataframe containing the yearly rates from 2011 to 2017 for the 62 neighbourhoods in SF 
year_rent_df = pd.DataFrame(dict(zip(keys, values)))
year_rent_df.head()

Unnamed: 0,Neighborhood,2011,2012,2013,2014,2015,2016,2017
0,Bayview,30723,28821,30433,35338,42870,45681,45747
1,Bernal Heights,34471,35739,38924,43654,53977,54833,53741
2,Buena Vista,42407,45678,49364,53889,61646,65690,61917
3,Corona Heights,41051,44269,48263,52768,61781,64072,59849
4,Cow Hollow,52856,52816,56455,62256,75947,78557,71952


In [12]:
#Calculating Avg Rent Price Per Year 
#and adding column 'City' to specify the location of neighborhood

year_rent_df["Avg Price Per Year"] = ""
year_rent_df["City"] = ""
for row in year_rent_df["Neighborhood"]:
    year_rent_df["Avg Price Per Year"] = year_rent_df.mean(axis = 1)
    year_rent_df["City"] = "San Francisco"
year_rent_df.head()

Unnamed: 0,Neighborhood,2011,2012,2013,2014,2015,2016,2017,Avg Price Per Year,City
0,Bayview,30723,28821,30433,35338,42870,45681,45747,37087.571429,San Francisco
1,Bernal Heights,34471,35739,38924,43654,53977,54833,53741,45048.428571,San Francisco
2,Buena Vista,42407,45678,49364,53889,61646,65690,61917,54370.142857,San Francisco
3,Corona Heights,41051,44269,48263,52768,61781,64072,59849,53150.428571,San Francisco
4,Cow Hollow,52856,52816,56455,62256,75947,78557,71952,64405.571429,San Francisco


### Adding geolocation information for each SF neighbourhood

In [13]:
#Importing dependencies to make a request for Lat and Lng
import requests
import json
from config import gkey

In [14]:
# Create new columns for geolocation
year_rent_df["Lat"] = ""
year_rent_df["Lng"] = ""

In [15]:
# create a params 
params = {"key": gkey}

# Loop through the year_rent_df and run a lat/long search for each neighborhood
for index, row in year_rent_df.iterrows():
    base_url = "https://maps.googleapis.com/maps/api/geocode/json"

    neighborhood = row["Neighborhood"]
    city = row["City"]

    # update address key value
    params['address'] = f"{neighborhood},{city}"

    # make request
    lat_lng = requests.get(base_url, params=params)
    
    
    # convert to json
    lat_lng = lat_lng.json()
    #inserting coordinates to assigned columns
    year_rent_df.loc[index, "Lat"] = lat_lng["results"][0]["geometry"]["location"]["lat"]
    year_rent_df.loc[index, "Lng"] = lat_lng["results"][0]["geometry"]["location"]["lng"]

In [16]:
year_rent_df.head()

Unnamed: 0,Neighborhood,2011,2012,2013,2014,2015,2016,2017,Avg Price Per Year,City,Lat,Lng
0,Bayview,30723,28821,30433,35338,42870,45681,45747,37087.571429,San Francisco,37.7304,-122.384
1,Bernal Heights,34471,35739,38924,43654,53977,54833,53741,45048.428571,San Francisco,37.7389,-122.415
2,Buena Vista,42407,45678,49364,53889,61646,65690,61917,54370.142857,San Francisco,37.8065,-122.421
3,Corona Heights,41051,44269,48263,52768,61781,64072,59849,53150.428571,San Francisco,37.7618,-122.443
4,Cow Hollow,52856,52816,56455,62256,75947,78557,71952,64405.571429,San Francisco,37.798,-122.44


In [17]:
# Save the file as csv file in the Data folder
year_rent_df.to_csv("../Data/yearly_rent.csv", encoding = "utf-8")