In [None]:
# Import the required libraries 

import pandas as pd
from sodapy import Socrata
import numpy as np
import matplotlib
import cufflinks as cf
import plotly
import plotly.offline as py
import plotly.graph_objs as go

cf.go_offline() # required to use plotly offline (no account required).
py.init_notebook_mode() # graphs charts inline (IPython).
import warnings
warnings.filterwarnings("ignore")

warnings.filterwarnings("ignore")
client = Socrata("data.medicare.gov", None)
# First 100 results, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("9wzi-peqs", limit=100)

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)

results_df.head(10)

results_df.columns

performance = pd.concat([results_df.how_often_the_home_health_team_began_their_patients_care_in_a_timely_manner,
                        results_df.how_often_the_home_health_team_checked_patients_for_depression])

performance

performance.value_counts().iplot(kind='bar')

In [None]:
# NYPD Collisions analysis
url = 'https://data.cityofnewyork.us/resource/qiz3-axqb.json?$limit=1000000&\
$where=date%20between%20%272017-01-01T00:00:00%27%20and%20%272018-01-01T00:00:00%27'
collisions = pd.read_json(url)

In [None]:
collisions.columns

In [None]:
contributing_factors = pd.concat(
          [collisions.contributing_factor_vehicle_1,
           collisions.contributing_factor_vehicle_2,
           collisions.contributing_factor_vehicle_3,
           collisions.contributing_factor_vehicle_4,
           collisions.contributing_factor_vehicle_5])

In [None]:
contributing_factors.value_counts().iplot(kind='bar')

In [None]:
temp = pd.DataFrame({'contributing_factors':contributing_factors.value_counts()})
df = temp[temp.index != 'Unspecified']
df = df.sort_values(by='contributing_factors', ascending=True)
data  = go.Data([
            go.Bar(
              y = df.index,
              x = df.contributing_factors,
              orientation='h'
        )])
layout = go.Layout(
        height = 1000,
        margin=go.Margin(l=300),
        title = "Contributing Factors for Vehicle Collisions in 2017-18"
)
fig  = go.Figure(data=data, layout=layout)
py.iplot(fig)

In [None]:
# transform the date column into an actual date object so that plotly is able to graph it in a time series. 

collisions.date = pd.to_datetime(collisions.date)

#df is sorted by date
collisions.date.sort_values().index 
df_by_date = collisions.iloc[collisions.date.sort_values().index]

In [None]:
# .groupby method to aggregate incidents by date as well as sum deaths per day. 
# plotting them is as easy as calling the .plot method in our dataframe.

collisions_by_date = df_by_date.groupby('date').date.count()
collisions_by_date.iplot(kind='scatter', title='Collisions Per Day')

deaths_by_date = df_by_date.groupby('date')['number_of_persons_killed'].sum()
deaths_by_date.iplot(kind='bar', title='Deaths per Day')

In [None]:
colli_deaths = pd.DataFrame({'collisions':collisions_by_date, 'deaths':deaths_by_date })

color1 = '#9467bd'
color2 = '#F08B00'

trace1 = go.Scatter(
    x = colli_deaths.index,
    y = colli_deaths['collisions'],
    name='collisions',
    line = dict(
        color = color1
    )
)
trace2 = go.Scatter(
    x= colli_deaths.index,
    y =colli_deaths['deaths'] ,
    name='deaths',
    yaxis='y2',
    mode='markers'

)
data = [trace1, trace2]
layout = go.Layout(
    title= "Collisions and Deaths per day",
    yaxis=dict(
        title='collisions',
        titlefont=dict(
            color=color1
        ),
        tickfont=dict(
            color=color1
        )
    ),
    yaxis2=dict(
        title='deaths',
        overlaying='y',
        side='right',
        titlefont=dict(
            color=color2
        ),
        tickfont=dict(
            color=color2
        )

    )

)
fig = go.Figure(data=data, layout=layout)
plot_url = py.iplot(fig)