In [150]:
# The purpose of this project was to find which cities would 
# be the best to live in according to a weighted mean created from
# certain statistics. This was a rather simple, introductory
# project to get used to using Pandas and numpy in the
# Jupyter Notebook.

In [1]:
import pandas as pd
import numpy as np
import cufflinks as cf
import plotly.plotly as py
import plotly.tools as tls
import plotly.graph_objs as go
import seaborn as sb

In [2]:
# LIST OF ALL CREATED TABLES #

# cities_table
# sqft_price_table
# house_price_table
# most_affordable_table
# sqft_price_table
# life_quality_table
# safety_table
# pollution_table
# climate_table
# stats_table
# corr_table
# weighted_table
# sorted_weighted_table

In [4]:
# Create the DataFrame that include cities that are of interest

cities_table = pd.DataFrame({'City-State': ['Phoenix-AZ', 'Los Angeles-CA', 'San Diego-CA', 'San Francisco-CA', 'Sacramento-CA', 'Denver-CO',
                                            'Chicago-IL', 'Detroit-MI', 'Portland-OR', 'Austin-TX', 'Dallas-TX', 'Houston-TX', 'San Antonio-TX',
                                            'Salt Lake City-UT', 'Seattle-WA'],
                             'Avg House Price by $X,000s'  : [253, 644, 584, 949, 399, 398, 219, 154, 389, 308, 295, 227, 184, 334, 490],
                             'Price Per Square Foot by $'  : [156, 428, 374, 497, 240, 264, 164, 132, 231, 211, 156, 137, 123, 203, 275 ],
                             'Cost of Living Index'        : [64.29, 77.51, 77.24, 96.85, 75.52, 72.00, 77.31, 62.75, 74.16, 70.47, 64.71, 62.17, 58.09, 62.98, 85.48],
                             'Quality of Life Index'       : [161.05, 150.53, 198.75, 171.99, 191.84, 170.96, 160.80, 134.20, 189.37, 193.35, 188.97, 176.47, 194.13, 180.80, 186.39],
                             'Purchasing Power Index'      : [146.15, 114.51, 144.91, 125.95, 145.88, 130.34, 133.70, 97.41, 126.03, 151.25, 153.42, 155.88, 150.96, 150.76, 140.00],
                             'Safety Index'                : [45.22, 51.39, 66.75, 50.14, 51.52, 59.32, 34.77, 28.81, 55.00, 65.10, 54.78, 40.92, 57.70, 74.80, 55.82 ],
                             'Pollution Index'             : [60.09, 59.10, 33.53, 33.89, 39.91, 41.52, 38.32, 54.82, 26.02, 38.12, 38.95, 51.37, 42.30, 59.01, 29.70],
                             'Climate Index'               : [53.76, 95.61, 97.08, 97.34, 90.64, 56.28, 66.11, 66.03, 89.56, 82.04, 82.58, 86.11, 81.17, 66.52, 91.78]
                             
                                       })


# Rearrange DataFrame to make it readable
cities_table = cities_table[['City-State', 'Quality of Life Index', 'Purchasing Power Index' , 'Cost of Living Index',
                             'Avg House Price by $X,000s', 'Price Per Square Foot by $', 'Safety Index',
                             'Pollution Index', 'Climate Index' ]]
cities_table

Unnamed: 0,City-State,Quality of Life Index,Purchasing Power Index,Cost of Living Index,"Avg House Price by $X,000s",Price Per Square Foot by $,Safety Index,Pollution Index,Climate Index
0,Phoenix-AZ,161.05,146.15,64.29,253,156,45.22,60.09,53.76
1,Los Angeles-CA,150.53,114.51,77.51,644,428,51.39,59.1,95.61
2,San Diego-CA,198.75,144.91,77.24,584,374,66.75,33.53,97.08
3,San Francisco-CA,171.99,125.95,96.85,949,497,50.14,33.89,97.34
4,Sacramento-CA,191.84,145.88,75.52,399,240,51.52,39.91,90.64
5,Denver-CO,170.96,130.34,72.0,398,264,59.32,41.52,56.28
6,Chicago-IL,160.8,133.7,77.31,219,164,34.77,38.32,66.11
7,Detroit-MI,134.2,97.41,62.75,154,132,28.81,54.82,66.03
8,Portland-OR,189.37,126.03,74.16,389,231,55.0,26.02,89.56
9,Austin-TX,193.35,151.25,70.47,308,211,65.1,38.12,82.04


In [8]:
# Uncomment to see Raw Table

# # SQFT PRICE
# # Sort by ascending Price Per Square Foot cost to get a # look at cities with most affordable land
# sqft_price_table = cities_table[['City-State', 'Price Per Square Foot by $']]
# sqft_price_table = sqft_price_table.sort_values(ascending = True, by=['Price Per Square Foot by $'])
# sqft_price_table



In [5]:
# Visualization for sqft

sqft_plotly_graph = [go.Bar(x = cities_table['City-State'], y = cities_table['Price Per Square Foot by $'])]

layout =  dict(title = 'Sqft Price',
              xaxis = dict(title = 'City'),
              yaxis = dict(title = 'Price per sqft'))


sqft_color_theme = dict(color=['rgba(0,0,0,1)', 'rgba(0,0,0,1))','rgba(0,0,0,1)', 'rgba(0,0,0,1)',
                          'rgba(0,0,0,1)', 'rgba(0,0,0,1)','rgba(0,0,0,1)', 'rgba(0,0,0,1)',
                         'rgba(0,0,0,1)', 'rgba(0,0,0,1)','rgba(0,0,0,1)','rgba(0,0,0,1)',
                         'rgba(0,0,0,1)','rgba(0,0,0,1)','rgba(0,0,0,1)'])

sqft_colored_scores = go.Bar(x = cities_table['City-State'], y = cities_table['Price Per Square Foot by $'], marker = sqft_color_theme)
data = [sqft_colored_scores]
layout = go.Layout(title='Sqft Price per City')
fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='Sqft Price (Colored)')



In [6]:
# Uncomment to see Raw Table

# # HOUSE PRICES

# # Sort by ascending house price to get a
# # look at cities with most affordable housing
# house_price_table = cities_table[['City-State', 'Avg House Price by $X,000s']]
# house_price_table = house_price_table.sort_values(ascending = True, by=['Avg House Price by $X,000s'])
# house_price_table

In [7]:
# Visualization for sqft

house_plotly_graph = [go.Bar(x = cities_table['City-State'], y = cities_table['Avg House Price by $X,000s'])]

layout =  dict(title = 'House Price Avg',
              xaxis = dict(title = 'City'),
              yaxis = dict(title = 'Avg House Price by $X,000s'))


house_color_theme = dict(color=['rgba(128,128,128,1)', 'rgba(128,128,128,1)','rgba(128,128,128,1)', 'rgba(128,128,128,1)',
                          'rgba(128,128,128,1)', 'rgba(128,128,128,1)','rgba(128,128,128,1)', 'rgba(128,128,128,1)',
                         'rgba(128,128,128,1)', 'rgba(128,128,128,1)','rgba(128,128,128,1)','rgba(128,128,128,1)',
                         'rgba(128,128,128,1)','rgba(128,128,128,1)','rgba(128,128,128,1)'])

house_colored_scores = go.Bar(x = cities_table['City-State'], y = cities_table['Avg House Price by $X,000s'], marker = house_color_theme)
data = [house_colored_scores]
layout = go.Layout(title='House Price per City')
fig = go.Figure(data=data, layout=layout)


py.iplot(fig, filename='House price (Colored)')

In [8]:
#Uncomment for Raw Table

# COST OF LIVING 

# # Sort by ascending cost of living price to get a
# # look at cities with most affordable living
# most_affordable_table = cities_table[['City-State', 'Cost of Living Index']]
# most_affordable_table = most_affordable_table.sort_values(ascending = True, by=['Cost of Living Index'])
# most_affordable_table




In [9]:
#Visualization
cost_plotly_graph = [go.Bar(x = cities_table['City-State'], y = cities_table['Cost of Living Index'])]

layout =  dict(title = 'Cost of Living',
              xaxis = dict(title = 'City'),
              yaxis = dict(title = 'Cost of Living Index'))


cost_color_theme = dict(color=['rgba(255,0,0,1)', 'rgba(255,0,0,1)','rgba(255,0,0,1)', 'rgba(255,0,0,1)',
                          'rgba(255,0,0,1)', 'rgba(255,0,0,1)','rgba(255,0,0,1)', 'rgba(255,0,0,1)',
                         'rgba(255,0,0,1)', 'rgba(255,0,0,1)','rgba(255,0,0,1)','rgba(255,0,0,1)',
                         'rgba(255,0,0,1)','rgba(255,0,0,1)','rgba(255,0,0,1)'])

cost_colored_scores = go.Bar(x = cities_table['City-State'], y = cities_table['Cost of Living Index'], marker = cost_color_theme)
data = [cost_colored_scores]
layout = go.Layout(title='Cost of Living by city')
fig = go.Figure(data=data, layout=layout)


py.iplot(fig, filename='Cost of Living (Colored)')

In [10]:
# Uncomment for Raw Table

# # PURCHSING POWER 

# # Sort by descending PPI to get a
# # second facctor for Cost Of Living
# sqft_price_table = cities_table[['City-State', 'Purchasing Power Index']]
# sqft_price_table = sqft_price_table.sort_values(ascending = False, by=['Purchasing Power Index'])
# sqft_price_table

In [11]:
# Visualization for PPI

ppi_plotly_graph = [go.Bar(x = cities_table['City-State'], y = cities_table['Purchasing Power Index'])]

layout =  dict(title = 'Purchasing Power',
              xaxis = dict(title = 'City'),
              yaxis = dict(title = 'PPI'))


ppi_color_theme = dict(color=['rgba(255,255,0,1)', 'rgba(255,255,0,1)','rgba(255,255,0,1)', 'rgba(255,255,0,1)',
                          'rgba(255,255,0,1)', 'rgba(255,255,0,1)','rgba(255,255,0,1)', 'rgba(255,255,0,1)',
                         'rgba(255,255,0,1)', 'rgba(255,255,0,1)','rgba(255,255,0,1)','rgba(255,255,0,1)',
                         'rgba(255,255,0,1)','rgba(255,255,0,1)','rgba(255,255,0,1)'])

ppi_colored_scores = go.Bar(x = cities_table['City-State'], y = cities_table['Purchasing Power Index'], marker = ppi_color_theme)
data = [ppi_colored_scores]
layout = go.Layout(title='Purchasing Power Index')
fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='Purchasing Power (Colored)')

In [12]:
# # QUALITY OF LIFE

# # Sort by descending quality of life index to get a
# # look at cities with best quality of life
# life_quality_table = cities_table[['City-State', 'Quality of Life Index']]
# life_quality_table = life_quality_table.sort_values(ascending = False, by=['Quality of Life Index'])
# life_quality_table

In [13]:
# Visualization for QOL

qol_plotly_graph = [go.Bar(x = cities_table['City-State'], y = cities_table['Quality of Life Index'])]

layout =  dict(title = 'Quality of Life',
              xaxis = dict(title = 'City'),
              yaxis = dict(title = 'QOL'))


qol_color_theme = dict(color=['rgba(0,255,0,1)', 'rgba(0,255,0,1)','rgba(0,255,0,1)', 'rgba(0,255,0,1)',
                          'rgba(0,255,0,1)', 'rgba(0,255,0,1)','rgba(0,255,0,1)', 'rgba(0,255,0,1)',
                         'rgba(0,255,0,1)', 'rgba(0,255,0,1)','rgba(0,255,0,1)','rgba(0,255,0,1)',
                         'rgba(0,255,0,1)','rgba(0,255,0,1)','rgba(0,255,0,1)'])

qol_colored_scores = go.Bar(x = cities_table['City-State'], y = cities_table['Quality of Life Index'], marker = qol_color_theme)
data = [qol_colored_scores]
layout = go.Layout(title='Quality of Life ')
fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='Quality of Life (Colored)')

In [14]:
# # SAFETY

# # Sort by descending safety index to get a
# # look at the safest cities
# safety_table = cities_table[['City-State', 'Safety Index']]
# safety_table = safety_table.sort_values(ascending = False, by=['Safety Index'])
# safety_table

In [15]:
# Visualization for Safety

safety_plotly_graph = [go.Bar(x = cities_table['City-State'], y = cities_table['Safety Index'])]

layout =  dict(title = 'Safety',
              xaxis = dict(title = 'City'),
              yaxis = dict(title = 'Safety Index'))


safety_color_theme = dict(color=['rgba(0,255,255,1)', 'rgba(0,255,255,1)','rgba(0,255,255,1)', 'rgba(0,255,255,1)',
                          'rgba(0,255,255,1)', 'rgba(0,255,255,1)','rgba(0,255,255,1)', 'rgba(0,255,255,1)',
                         'rgba(0,255,255,1)', 'rgba(0,255,255,1)','rgba(0,255,255,1)','rgba(0,255,255,1)',
                         'rgba(0,255,255,1)','rgba(0,255,255,1)','rgba(0,255,255,1)'])

safety_colored_scores = go.Bar(x = cities_table['City-State'], y = cities_table['Safety Index'], marker = safety_color_theme)
data = [safety_colored_scores]
layout = go.Layout(title='Safety Index')
fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='Safety Index (Colored)')

In [16]:
# # POLLUTION

# # Sort by ascending Pollution index to get a
# # look at the least polluted cities
# pollution_table = cities_table[['City-State', 'Pollution Index']]
# pollution_table = pollution_table.sort_values(ascending = True, by=['Pollution Index'])
# pollution_table

In [17]:
# Visualization for Pollution

pollution_plotly_graph = [go.Bar(x = cities_table['City-State'], y = cities_table['Pollution Index'])]

layout =  dict(title = 'Pollution',
              xaxis = dict(title = 'City'),
              yaxis = dict(title = 'Pollution Index'))


pollution_color_theme = dict(color=['rgba(0,0,255,1)', 'rgba(0,0,255,1)','rgba(0,0,255,1)', 'rgba(0,0,255,1)',
                          'rgba(0,0,255,1)', 'rgba(0,0,255,1)','rgba(0,0,255,1)', 'rgba(0,0,255,1)',
                         'rgba(0,0,255,1)', 'rgba(0,0,255,1)','rgba(0,0,255,1)','rgba(0,0,255,1)',
                         'rgba(0,0,255,1)','rgba(0,0,255,1)','rgba(0,0,255,1)'])

pollution_colored_scores = go.Bar(x = cities_table['City-State'], y = cities_table['Pollution Index'], marker = pollution_color_theme)
data = [pollution_colored_scores]
layout = go.Layout(title='Pollution Index')
fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='Pollution Index (Colored)')

In [18]:
# # CLIMATE

# # Sort by descening climate index to get a
# # look at cities with the best weather
# climate_table = cities_table[['City-State', 'Climate Index']]
# climate_table = climate_table.sort_values(ascending = False, by=['Climate Index'])
# climate_table

In [19]:
# Visualization for Climate

climate_plotly_graph = [go.Bar(x = cities_table['City-State'], y = cities_table['Climate Index'])]

layout =  dict(title = 'Climate',
              xaxis = dict(title = 'City'),
              yaxis = dict(title = 'Climate Index'))


climate_color_theme = dict(color=['rgba(255,0,255,1)', 'rgba(255,0,255,1)','rgba(255,0,255,1)', 'rgba(255,0,255,1)',
                          'rgba(255,0,255,1)', 'rgba(255,0,255,1)','rgba(255,0,255,1)', 'rgba(255,0,255,1)',
                         'rgba(255,0,255,1)', 'rgba(255,0,255,1)','rgba(255,0,255,1)','rgba(255,0,255,1)',
                         'rgba(255,0,255,1)','rgba(255,0,255,1)','rgba(255,0,255,1)'])

climate_colored_scores = go.Bar(x = cities_table['City-State'], y = cities_table['Climate Index'], marker = climate_color_theme)
data = [climate_colored_scores]
layout = go.Layout(title='Climate Index')
fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='Climate Index (Colored)')

In [20]:
weighted_table = cities_table.groupby('City-State')
def wavg(group):
    a = group['Safety Index']
    b = group['Climate Index']
    c = group['Quality of Life Index']
    d = group['Purchasing Power Index']
    e = group['Cost of Living Index']
    f = group['Avg House Price by $X,000s']
    g = group['Price Per Square Foot by $']
    h = group['Pollution Index']

    return  (((0.20*c) + (0.05*d) - (0.20*e) - (0.20*f) - (0.20*g) + (0.05*a) - (0.05*h) + (0.05*b) + 500)/500) * 100 
    
weighted_table = weighted_table.apply(wavg)
weighted_table


City-State           
Austin-TX          9     86.7579
Chicago-IL         6     89.9822
Dallas-TX          10    89.4487
Denver-CO          5     79.5226
Detroit-MI         7     92.7923
Houston-TX         11    92.3274
Los Angeles-CA     1     62.0649
Phoenix-AZ         0     89.3608
Portland-OR        8     82.2541
Sacramento-CA      4     81.5741
Salt Lake City-UT  13    85.5635
San Antonio-TX     12    95.6369
San Diego-CA       2     69.2925
San Francisco-CA   3     47.5610
Seattle-WA         14    76.0154
dtype: float64

In [21]:
sorted_weighted_table = weighted_table.sort_values(ascending = True)
sorted_weighted_table

City-State           
San Francisco-CA   3     47.5610
Los Angeles-CA     1     62.0649
San Diego-CA       2     69.2925
Seattle-WA         14    76.0154
Denver-CO          5     79.5226
Sacramento-CA      4     81.5741
Portland-OR        8     82.2541
Salt Lake City-UT  13    85.5635
Austin-TX          9     86.7579
Phoenix-AZ         0     89.3608
Dallas-TX          10    89.4487
Chicago-IL         6     89.9822
Houston-TX         11    92.3274
Detroit-MI         7     92.7923
San Antonio-TX     12    95.6369
dtype: float64

In [22]:
sorted_weighted_table.plot(kind = 'barh', color = 'c')


<matplotlib.axes._subplots.AxesSubplot at 0x23143661710>

In [23]:
scores_plotly = pd.read_csv('graph.csv')
scores_plotly

Unnamed: 0,City,Score
0,San Francisco-CA,47.561
1,Los Angeles-CA,62.0649
2,San Diego-CA,69.2925
3,Seattle-WA,76.0154
4,Denver-CO,79.5226
5,Sacramento-CA,81.5741
6,Portland-OR,82.2541
7,Salt Lake City-UT,85.5635
8,Austin-TX,86.7579
9,Phoenix-AZ,89.3608


In [24]:
scores_plotly.sort_values(ascending = False, by = 'Score' )

Unnamed: 0,City,Score
14,San Antonio-TX,95.6369
13,Detroit-MI,92.7923
12,Houston-TX,92.3274
11,Chicago-IL,89.9822
10,Dallas-TX,89.4487
9,Phoenix-AZ,89.3608
8,Austin-TX,86.7579
7,Salt Lake City-UT,85.5635
6,Portland-OR,82.2541
5,Sacramento-CA,81.5741


In [25]:
scores_plotly_graph = [go.Bar(x = scores_plotly['City'], y = scores_plotly['Score'])]
print (scores_plotly_graph)

[{'type': 'bar', 'x': 0      San Francisco-CA 
1        Los Angeles-CA 
2          San Diego-CA 
3            Seattle-WA 
4             Denver-CO 
5         Sacramento-CA 
6           Portland-OR 
7     Salt Lake City-UT 
8             Austin-TX 
9            Phoenix-AZ 
10            Dallas-TX 
11           Chicago-IL 
12           Houston-TX 
13           Detroit-MI 
14       San Antonio-TX 
Name: City, dtype: object, 'y': 0     47.5610
1     62.0649
2     69.2925
3     76.0154
4     79.5226
5     81.5741
6     82.2541
7     85.5635
8     86.7579
9     89.3608
10    89.4487
11    89.9822
12    92.3274
13    92.7923
14    95.6369
Name: Score, dtype: float64}]


In [26]:
tls.set_credentials_file(username = 'UzairM20', api_key = '6WWojpa4GHU7kGomwfDh')

layout =  dict(title = 'City Scores',
              xaxis = dict(title = 'City'),
              yaxis = dict(title = 'Score'))
# py.iplot(scores_plotly_graph,filename = 'rated cities scores', layout = layout)

In [27]:
color_theme = dict(color=['rgba(0,0,0,1)', 'rgba(128,128,128,1)','rgba(102,0,0,1)', 'rgba(0,0,204,1)',
                          'rgba(255,255,0,1)', 'rgba(0,255,128,1)','rgba(0,153,0,1)', 'rgba(125,258,0,1)',
                         'rgba(255,0,0,1)', 'rgba(255,153,153,1)','rgba(255,128,0,1)','rgba(204,153,255,1)',
                         'rgba(0,255,255,1)','rgba(102,0,204,1)','rgba(255,0,255,1)'])
# print (color_theme)

In [28]:
colored_scores = go.Bar(x = scores_plotly['City'], y = scores_plotly['Score'], marker = color_theme)
data = [colored_scores]
layout = go.Layout(title='City Scores')
fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='City Score Bar Chart')

<seaborn.axisgrid.PairGrid at 0x2314e815240>