In [1]:
# https://towardsdatascience.com/interactive-controls-for-jupyter-notebooks-f5c94829aee6
import pandas as pd
import seaborn as sns
import statsmodels
import statsmodels.formula.api as smf
import patsy
import os
import matplotlib.pyplot as plt
%matplotlib inline

import ipywidgets as widgets
from ipywidgets import HBox, VBox, interact, interact_manual
import numpy as np
from IPython.display import display
import cufflinks as cf

In [9]:
cf.go_offline(connected=True)
cf.set_config_file(colorscale='plotly', world_readable=True)

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

### PM2.5 Data

In [2]:
pm25 = pd.read_csv("Class_PM25_Data.csv")
pm25 = pm25.dropna()
pm25

Unnamed: 0,Block_Group_ID,Income,Elevation,PM25_MAR_8
0,4.903510e+11,43646.0,1307.687012,3.651442
1,4.903510e+11,35775.0,1296.852051,3.840161
2,4.903510e+11,38281.0,1289.720459,3.957534
3,4.903510e+11,64091.0,1301.926758,3.945700
4,4.903510e+11,37083.0,1296.852051,3.648757
5,4.903510e+11,21500.0,1301.860962,4.565435
6,4.903510e+11,41154.0,1321.069824,3.173814
7,4.903510e+11,33772.0,1321.069824,3.751625
8,4.903510e+11,37566.0,1321.069824,3.659221
9,4.903510e+11,33438.0,1410.462280,3.562500


In [3]:
# use interact to filter the dataframe
@interact
def show_more_than(column=['Income', 'Elevation'], 
                            x=(10, 100000, 10)):
    return pm25.loc[pm25[column] > x]

interactive(children=(Dropdown(description='column', options=('Income', 'Elevation'), value='Income'), IntSlid…

In [4]:
# use interact to print the correlation between two columns
@interact
def correlations(column1=list(pm25.columns)[1:],
                column2=list(pm25.columns)[1:]):
    print(f"Correlation: {pm25[column1].corr(pm25[column2])}")

interactive(children=(Dropdown(description='column1', options=('Income', 'Elevation', 'PM25_MAR_8'), value='In…

In [None]:
# quick linear regression
@interact
def regression(X=list(pm25.columns)[1:],
                y=list(pm25.columns)[1:]):
    mod = smf.ols(formula='pm25[X] ~ pm25[y]', data=pm25)
    res = mod.fit()
    print(res.summary())

In [15]:
# quick scatter plot
@interact
def scatter_plot(x=list(pm25.columns), 
                 y=list(pm25.columns)[1:]):
    
    pm25.iplot(kind='scatter', x=x, y=y, mode='markers', 
             xTitle=x.title(), yTitle=y.title(), title=f'{y.title()} vs {x.title()}')
    
    #regression model - doesn't work - why?
    #mod = smf.ols(formula='pm25[x] ~ pm25[y]', data=pm25)
    #res = mod.fit()
    #pm25.iplot(kind='scatter', x=x, y=res.predict(), mode='markers')

interactive(children=(Dropdown(description='x', options=('Block_Group_ID', 'Income', 'Elevation', 'PM25_MAR_8'…

In [9]:
help(pm25.iplot)

Help on method _iplot in module cufflinks.plotlytools:

_iplot(kind='scatter', data=None, layout=None, filename='', sharing=None, title='', xTitle='', yTitle='', zTitle='', theme=None, colors=None, colorscale=None, fill=False, width=None, dash='solid', mode='lines', interpolation='linear', symbol='circle', size=12, barmode='', sortbars=False, bargap=None, bargroupgap=None, bins=None, histnorm='', histfunc='count', orientation='v', boxpoints=False, annotations=None, keys=False, bestfit=False, bestfit_colors=None, mean=False, mean_colors=None, categories='', x='', y='', z='', text='', gridcolor=None, zerolinecolor=None, margin=None, labels=None, values=None, secondary_y='', secondary_y_title='', subplots=False, shape=None, error_x=None, error_y=None, error_type='data', locations=None, lon=None, lat=None, asFrame=False, asDates=False, asFigure=False, asImage=False, dimensions=None, asPlot=False, asUrl=False, online=None, **kwargs) method of pandas.core.frame.DataFrame instance
           

### Ozone Data

In [8]:
ozone = pd.read_csv("Class_Ozone_Data.csv")
ozone = ozone.dropna()
ozone.head()

Unnamed: 0,Block_Group_ID,Income,Elevation,Ozone_AUG_10,Ozone_AUG_15
0,490351001002,43646.0,1307.687012,27.557288,52.544356
1,490351003061,35775.0,1296.852051,26.284309,54.727856
2,490351003062,38281.0,1289.720459,27.685072,52.71177
3,490351006003,64091.0,1301.926758,27.599777,53.07081
4,490351006004,37083.0,1296.852051,28.802653,55.096362


In [9]:
@interact
def correlations(column1=list(ozone.columns)[1:],
                column2=list(ozone.columns)[1:]):
    print(f"Correlation: {ozone[column1].corr(ozone[column2])}")

interactive(children=(Dropdown(description='column1', options=('Income', 'Elevation', 'Ozone_AUG_10', 'Ozone_A…

In [10]:
@interact
def regression(X=list(ozone.columns)[1:],
                y=list(ozone.columns)[1:]):
    mod = smf.ols(formula='ozone[X] ~ ozone[y]', data=ozone)
    res = mod.fit()
    print(res.summary())

interactive(children=(Dropdown(description='X', options=('Income', 'Elevation', 'Ozone_AUG_10', 'Ozone_AUG_15'…