# K-Means Clustering Method from Kaggle

Source: https://www.kaggle.com/minc33/visualizing-high-dimensional-clusters/notebook

In [5]:
#Basic imports
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

#sklearn imports
from sklearn.decomposition import PCA #Principal Component Analysis
from sklearn.manifold import TSNE #T-Distributed Stochastic Neighbor Embedding
from sklearn.cluster import KMeans #K-Means Clustering
from sklearn.preprocessing import StandardScaler #used for 'Feature Scaling'

#plotly imports
import plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

# Prep data

In [7]:
#read dummy df
X = pd.read_csv('trip_dummies_df.csv', index_col=0)

In [8]:
X.head()

Unnamed: 0,trip_id,uniqueid,number_of_trips_taken,qtemphigh,qtemplow,qprecipitation,allwt,trip_id_count,qday_Friday,qday_Monday,qday_Saturday,qday_Sunday,qday_Thursday,qday_Tuesday,qday_Wednesday,qday1typical_Don't know,qday1typical_Not at all typical,qday1typical_Not very typical,qday1typical_Refused,qday1typical_Somewhat typical,qday1typical_Very typical,qborough_home_Brooklyn,qborough_home_Manhattan,qborough_home_Queens,qborough_home_Staten Island,qborough_home_The Bronx,qsurveyzone_home_Inner Brooklyn,qsurveyzone_home_Inner Queens,qsurveyzone_home_Manhattan Core,qsurveyzone_home_Middle Queens,qsurveyzone_home_Northern Bronx,qsurveyzone_home_Northern Manhattan,qsurveyzone_home_Outer Brooklyn,qsurveyzone_home_Outer Queens,qsurveyzone_home_Southern Bronx,qsurveyzone_home_Staten Island,qtripdaytime_AM,qtripdaytime_NOON,qtripdaytime_PM,qborough_start_Brooklyn,qborough_start_Don't know,qborough_start_Inner Brooklyn,qborough_start_Inner Queens,qborough_start_Manhattan,qborough_start_Manhattan Core,qborough_start_Middle Queens,qborough_start_Northern Bronx,qborough_start_Northern Manhattan,qborough_start_Outer Brooklyn,qborough_start_Outer Queens,qborough_start_Outside of NYC,qborough_start_Queens,qborough_start_Refused,qborough_start_Southern Bronx,qborough_start_Staten Island,qborough_start_The Bronx,surveyzone_start_Inner Brooklyn,surveyzone_start_Inner Queens,surveyzone_start_Manhattan Core,surveyzone_start_Middle Queens,surveyzone_start_NOT CODED,surveyzone_start_Northern Bronx,surveyzone_start_Northern Manhattan,surveyzone_start_Outer Brooklyn,surveyzone_start_Outer Queens,surveyzone_start_Southern Bronx,surveyzone_start_Staten Island,qday1tripend_Child's daycare facility or school,qday1tripend_Doctor's office or hospital,"qday1tripend_Entertainment event (i.e. sporting event, play, etc.)",qday1tripend_Friend or family member's home,"qday1tripend_Grocery store or market (including deli, bodega, etc.)",qday1tripend_Home,qday1tripend_Other,qday1tripend_Outside of New York City,qday1tripend_Park/Recreational area/Gym,qday1tripend_Refused,qday1tripend_Restaurant or bar,"qday1tripend_Retail store (e.g. clothing, electronic, hardware, etc.)",qday1tripend_School,qday1tripend_Work,qborough_end_Brooklyn,qborough_end_Don't know,qborough_end_Manhattan,qborough_end_Outside of NYC,qborough_end_Queens,qborough_end_Refused,qborough_end_Staten Island,qborough_end_The Bronx,surveyzone_end_Inner Brooklyn,surveyzone_end_Inner Queens,surveyzone_end_Manhattan Core,surveyzone_end_Middle Queens,surveyzone_end_Northern Bronx,surveyzone_end_Northern Manhattan,surveyzone_end_Outer Brooklyn,surveyzone_end_Outer Queens,surveyzone_end_Southern Bronx,surveyzone_end_Staten Island,qday1tripendcode_Airport,qday1tripendcode_Bus stop,qday1tripendcode_Child's daycare facility or school,qday1tripendcode_Commuter rail station,qday1tripendcode_Doctor's office or hospital,"qday1tripendcode_Entertainment event (i.e. sporting event, play, etc.)",qday1tripendcode_Friend or family member's home,qday1tripendcode_Grand Central Station,"qday1tripendcode_Grocery store or market (including deli, bodega, etc.)",qday1tripendcode_Home,qday1tripendcode_Other,qday1tripendcode_PATH Station,qday1tripendcode_Park and ride/parking lot,qday1tripendcode_Park/Recreational area/Gym,qday1tripendcode_Penn Station,qday1tripendcode_Refused,qday1tripendcode_Restaurant or bar,"qday1tripendcode_Retail store (e.g. clothing, electronic, hardware, etc.)",qday1tripendcode_Road/tunnel/bridge,qday1tripendcode_School,qday1tripendcode_Work,qday1triplength_cat_0 to 5,qday1triplength_cat_11 to 15,qday1triplength_cat_16 to 20,qday1triplength_cat_180+,qday1triplength_cat_21 to 25,qday1triplength_cat_26 to 30,qday1triplength_cat_31 to 35,qday1triplength_cat_36 to 40,qday1triplength_cat_41 to 45,qday1triplength_cat_46 to 50,qday1triplength_cat_51 to 55,qday1triplength_cat_56 to 60,qday1triplength_cat_6 to 10,qday1triplength_cat_61 to 180,qgender_Female,qgender_Male,qgender_Refused,"qincome_$100,000 - $149,999","qincome_$15,000 - $24,999","qincome_$150,000-$199,999","qincome_$200,000 and above","qincome_$25,000 - $34,999","qincome_$35,000 - $49,999","qincome_$50,000 - $74,999","qincome_$75,000 - $99,999","qincome_Less than $14,999",qincome_Refused,qdisability1_No,qdisability1_Yes,qdisability2_No,qdisability2_Yes,qdisability3_No,qdisability3_Yes,qdisability4_No,qdisability4_Yes,qdisability5_No,qdisability5_Yes,qdisability6_No,qdisability6_Yes,qdisability7_No,qdisability7_Yes,qdisability8_No,qdisability8_Yes,qdisability9_No,qdisability9_Yes,qagecode_18-24,qagecode_25-34,qagecode_35-44,qagecode_45-54,qagecode_55-64,qagecode_65 or older,"qracecode_Asian, Non-Hispanic","qracecode_Black, Non-Hispanic","qracecode_Don't know, Non-Hispanic",qracecode_Hispanic,"qracecode_Other, Non-Hispanic","qracecode_White, Non-Hispanic","qeducation_Associate degree (i.e., AA, AS)","qeducation_Bachelor's degree (i.e., BA, BS, AB)","qeducation_Graduate degree (i.e., Master's, Professional, Doctorate)","qeducation_High school graduate or equivalent (i.e., GED)",qeducation_No high school,qeducation_Some college but degree not received or in progress,qeducation_Some high school,qlicense_No,qlicense_Refused,qlicense_Yes,qcaraccess_I do not have access to a car,"qcaraccess_I do not personally own or lease a car, but I have access to a car belonging to a member of my household",qcaraccess_I personally own or lease a car,qcaraccess_Other,qcaraccess_Refused,qwelfare1_No,qwelfare1_Yes,qwelfare2_No,qwelfare2_Yes,qwelfare3_No,qwelfare3_Yes,qwelfare4_No,qwelfare4_Yes,qwelfare5_No,qwelfare5_Yes,qsmartphone_No,qsmartphone_Refused,qsmartphone_Yes,qshare1_No,qshare1_Yes,qshare2_No,qshare2_Yes,qshare3_No,qshare3_Yes,qshare4_No,qshare4_Yes,qshare5_No,qshare5_Yes,qshare6_No,qshare6_Yes,qshare7_No,qshare7_Yes,qshare8_No,qshare8_Yes,qcitibike_No,qcitibike_Refused,qcitibike_Yes,qday1triptravelcode_sp_Car Service,qday1triptravelcode_sp_Carpool,qday1triptravelcode_sp_Carshare,qday1triptravelcode_sp_Citi Bike,qday1triptravelcode_sp_Community van/dollar van,qday1triptravelcode_sp_Commuter rail,qday1triptravelcode_sp_Don't know,qday1triptravelcode_sp_Electric bicycle,qday1triptravelcode_sp_Express bus,qday1triptravelcode_sp_Green taxi,qday1triptravelcode_sp_Local bus,qday1triptravelcode_sp_Motorcycle,qday1triptravelcode_sp_Other,qday1triptravelcode_sp_Other ferry,qday1triptravelcode_sp_PATH train,qday1triptravelcode_sp_Paratransit/ Access-A-Ride,qday1triptravelcode_sp_Personal bicycle,qday1triptravelcode_sp_Personal car,qday1triptravelcode_sp_Refused,qday1triptravelcode_sp_Ride-hail service such as Uber or Lyft,qday1triptravelcode_sp_Select bus service,"qday1triptravelcode_sp_Shared-ride service such a Uber Pool, Via, or Lyft Line",qday1triptravelcode_sp_Staten Island ferry,qday1triptravelcode_sp_Subway,qday1triptravelcode_sp_Walk,qday1triptravelcode_sp_Yellow taxi,qmodegrouping_Bike,qmodegrouping_Bus,qmodegrouping_Car,qmodegrouping_Commuter Rail,qmodegrouping_Ferry,qmodegrouping_For-Hire Vehicle,qmodegrouping_Other,qmodegrouping_Subway,qmodegrouping_Walk,qsustainablemode_No,qsustainablemode_Yes,qpurposerecode_Accompanying other traveler,qpurposerecode_Business,qpurposerecode_Commute to/from work,qpurposerecode_Dining,qpurposerecode_Medical visit (doctor's office),qpurposerecode_Other,qpurposerecode_Personal errands,qpurposerecode_Refused,qpurposerecode_School,qpurposerecode_Shopping,qpurposerecode_Social/recreation
0,3101674,101674,3.0,79.0,58.0,0.0,2.269123,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0
1,11202717,10202717,3.0,66.0,51.0,0.25,0.868687,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0
2,12202717,10202717,3.0,66.0,51.0,0.25,0.868687,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1
3,51200330,50200330,4.0,73.0,57.0,0.0,2.433119,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1
4,52200330,50200330,4.0,73.0,57.0,0.0,2.433119,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1


In [9]:
#scale numerical variables
numer = X[["number_of_trips_taken", "qtemphigh", "qtemplow", "qprecipitation"]]

In [15]:
#consider removing demographic variables unrelated to mobility, refused, don't know variables

categ = X[["qday_Friday","qday_Monday","qday_Saturday","qday_Sunday","qday_Thursday","qday_Tuesday",
           "qday_Wednesday","qday1typical_Not at all typical",
           "qday1typical_Not very typical","qday1typical_Somewhat typical",
           "qday1typical_Very typical","qborough_home_Brooklyn","qborough_home_Manhattan",
           "qborough_home_Queens","qborough_home_Staten Island","qborough_home_The Bronx",
           "qsurveyzone_home_Inner Brooklyn","qsurveyzone_home_Inner Queens",
           "qsurveyzone_home_Manhattan Core","qsurveyzone_home_Middle Queens",
           "qsurveyzone_home_Northern Bronx","qsurveyzone_home_Northern Manhattan",
           "qsurveyzone_home_Outer Brooklyn","qsurveyzone_home_Outer Queens",
           "qsurveyzone_home_Southern Bronx", "qsurveyzone_home_Staten Island",
           "qtripdaytime_AM","qtripdaytime_NOON","qtripdaytime_PM","qborough_start_Brooklyn",
           "qborough_start_Inner Brooklyn",
           "qborough_start_Inner Queens","qborough_start_Manhattan","qborough_start_Manhattan Core",
           "qborough_start_Middle Queens","qborough_start_Northern Bronx","qborough_start_Northern Manhattan",
           "qborough_start_Outer Brooklyn","qborough_start_Outer Queens","qborough_start_Outside of NYC","qborough_start_Queens",
           "qborough_start_Southern Bronx","qborough_start_Staten Island",
           "qborough_start_The Bronx","surveyzone_start_Inner Brooklyn","surveyzone_start_Inner Queens",
           "surveyzone_start_Manhattan Core","surveyzone_start_Middle Queens","surveyzone_start_NOT CODED","surveyzone_start_Northern Bronx",
           "surveyzone_start_Northern Manhattan","surveyzone_start_Outer Brooklyn","surveyzone_start_Outer Queens",
           "surveyzone_start_Southern Bronx","surveyzone_start_Staten Island","qday1tripend_Child's daycare facility or school",
           "qday1tripend_Doctor's office or hospital","qday1tripend_Entertainment event (i.e. sporting event, play, etc.)",
           "qday1tripend_Friend or family member's home","qday1tripend_Grocery store or market (including deli, bodega, etc.)","qday1tripend_Home",
           "qday1tripend_Other","qday1tripend_Outside of New York City","qday1tripend_Park/Recreational area/Gym","qday1tripend_Restaurant or bar",
           "qday1tripend_Retail store (e.g. clothing, electronic, hardware, etc.)",
           "qday1tripend_School","qday1tripend_Work","qborough_end_Brooklyn",
           "qborough_end_Manhattan","qborough_end_Outside of NYC","qborough_end_Queens",
           "qborough_end_Staten Island","qborough_end_The Bronx",
           "surveyzone_end_Inner Brooklyn","surveyzone_end_Inner Queens",
           "surveyzone_end_Manhattan Core","surveyzone_end_Middle Queens",
           "surveyzone_end_Northern Bronx","surveyzone_end_Northern Manhattan",
           "surveyzone_end_Outer Brooklyn","surveyzone_end_Outer Queens",
           "surveyzone_end_Southern Bronx","surveyzone_end_Staten Island","qday1tripendcode_Airport",
           "qday1tripendcode_Bus stop","qday1tripendcode_Child's daycare facility or school",
           "qday1tripendcode_Commuter rail station","qday1tripendcode_Doctor's office or hospital",
           "qday1tripendcode_Entertainment event (i.e. sporting event, play, etc.)",
           "qday1tripendcode_Friend or family member's home","qday1tripendcode_Grand Central Station",
           "qday1tripendcode_Grocery store or market (including deli, bodega, etc.)",
           "qday1tripendcode_Home","qday1tripendcode_Other","qday1tripendcode_PATH Station","qday1tripendcode_Park and ride/parking lot",
           "qday1tripendcode_Park/Recreational area/Gym","qday1tripendcode_Penn Station",
           "qday1tripendcode_Restaurant or bar","qday1tripendcode_Retail store (e.g. clothing, electronic, hardware, etc.)",
           "qday1tripendcode_Road/tunnel/bridge","qday1tripendcode_School","qday1tripendcode_Work",
           "qday1triplength_cat_0 to 5","qday1triplength_cat_11 to 15","qday1triplength_cat_16 to 20",
           "qday1triplength_cat_180+","qday1triplength_cat_21 to 25","qday1triplength_cat_26 to 30",
           "qday1triplength_cat_31 to 35","qday1triplength_cat_36 to 40","qday1triplength_cat_41 to 45",
           "qday1triplength_cat_46 to 50","qday1triplength_cat_51 to 55","qday1triplength_cat_56 to 60",
           "qday1triplength_cat_6 to 10","qday1triplength_cat_61 to 180","qdisability1_No","qdisability1_Yes","qdisability2_No",
           "qdisability2_Yes","qdisability3_No","qdisability3_Yes","qdisability4_No","qdisability4_Yes",
           "qdisability5_No","qdisability5_Yes","qdisability6_No","qdisability6_Yes","qdisability7_No",
           "qdisability7_Yes","qdisability8_No","qdisability8_Yes","qdisability9_No","qdisability9_Yes","qagecode_18-24","qagecode_25-34",
           "qagecode_35-44","qagecode_45-54","qagecode_55-64","qagecode_65 or older",
           "qlicense_No","qlicense_Yes","qcaraccess_I do not have access to a car",
           "qcaraccess_I do not personally own or lease a car, but I have access to a car belonging to a member of my household",
           "qcaraccess_I personally own or lease a car","qcaraccess_Other","qwelfare1_No",
           "qwelfare1_Yes","qwelfare2_No","qwelfare2_Yes","qwelfare3_No","qwelfare3_Yes","qwelfare4_No",
           "qwelfare4_Yes","qwelfare5_No","qwelfare5_Yes","qcitibike_No","qcitibike_Yes","qday1triptravelcode_sp_Car Service",
           "qday1triptravelcode_sp_Carpool","qday1triptravelcode_sp_Carshare","qday1triptravelcode_sp_Citi Bike",
           "qday1triptravelcode_sp_Community van/dollar van","qday1triptravelcode_sp_Commuter rail",
           "qday1triptravelcode_sp_Electric bicycle","qday1triptravelcode_sp_Express bus","qday1triptravelcode_sp_Green taxi",
           "qday1triptravelcode_sp_Local bus","qday1triptravelcode_sp_Motorcycle","qday1triptravelcode_sp_Other",
           "qday1triptravelcode_sp_Other ferry","qday1triptravelcode_sp_PATH train","qday1triptravelcode_sp_Paratransit/ Access-A-Ride",
           "qday1triptravelcode_sp_Personal bicycle","qday1triptravelcode_sp_Personal car",
           "qday1triptravelcode_sp_Ride-hail service such as Uber or Lyft","qday1triptravelcode_sp_Select bus service",
           "qday1triptravelcode_sp_Shared-ride service such a Uber Pool, Via, or Lyft Line",
           "qday1triptravelcode_sp_Staten Island ferry","qday1triptravelcode_sp_Subway","qday1triptravelcode_sp_Walk",
           "qday1triptravelcode_sp_Yellow taxi","qmodegrouping_Bike","qmodegrouping_Bus","qmodegrouping_Car",
           "qmodegrouping_Commuter Rail","qmodegrouping_Ferry","qmodegrouping_For-Hire Vehicle","qmodegrouping_Other",
           "qmodegrouping_Subway","qmodegrouping_Walk","qsustainablemode_No","qsustainablemode_Yes","qpurposerecode_Accompanying other traveler",
           "qpurposerecode_Business","qpurposerecode_Commute to/from work","qpurposerecode_Dining",
           "qpurposerecode_Medical visit (doctor's office)","qpurposerecode_Other","qpurposerecode_Personal errands",
           "qpurposerecode_School","qpurposerecode_Shopping","qpurposerecode_Social/recreation"]]




In [16]:
#initialize scalar
scaler = StandardScaler()

In [17]:
#scale numerical data
numer = pd.DataFrame(scaler.fit_transform(numer))

In [21]:
#rename columns
numer.columns = ["number_of_trips_taken", "qtemphigh", "qtemplow", "qprecipitation"]

In [22]:
X = pd.concat([numer, categ], axis=1, join='inner')

In [23]:
X.head()

Unnamed: 0,number_of_trips_taken,qtemphigh,qtemplow,qprecipitation,qday_Friday,qday_Monday,qday_Saturday,qday_Sunday,qday_Thursday,qday_Tuesday,qday_Wednesday,qday1typical_Not at all typical,qday1typical_Not very typical,qday1typical_Somewhat typical,qday1typical_Very typical,qborough_home_Brooklyn,qborough_home_Manhattan,qborough_home_Queens,qborough_home_Staten Island,qborough_home_The Bronx,qsurveyzone_home_Inner Brooklyn,qsurveyzone_home_Inner Queens,qsurveyzone_home_Manhattan Core,qsurveyzone_home_Middle Queens,qsurveyzone_home_Northern Bronx,qsurveyzone_home_Northern Manhattan,qsurveyzone_home_Outer Brooklyn,qsurveyzone_home_Outer Queens,qsurveyzone_home_Southern Bronx,qsurveyzone_home_Staten Island,qtripdaytime_AM,qtripdaytime_NOON,qtripdaytime_PM,qborough_start_Brooklyn,qborough_start_Inner Brooklyn,qborough_start_Inner Queens,qborough_start_Manhattan,qborough_start_Manhattan Core,qborough_start_Middle Queens,qborough_start_Northern Bronx,qborough_start_Northern Manhattan,qborough_start_Outer Brooklyn,qborough_start_Outer Queens,qborough_start_Outside of NYC,qborough_start_Queens,qborough_start_Southern Bronx,qborough_start_Staten Island,qborough_start_The Bronx,surveyzone_start_Inner Brooklyn,surveyzone_start_Inner Queens,surveyzone_start_Manhattan Core,surveyzone_start_Middle Queens,surveyzone_start_NOT CODED,surveyzone_start_Northern Bronx,surveyzone_start_Northern Manhattan,surveyzone_start_Outer Brooklyn,surveyzone_start_Outer Queens,surveyzone_start_Southern Bronx,surveyzone_start_Staten Island,qday1tripend_Child's daycare facility or school,qday1tripend_Doctor's office or hospital,"qday1tripend_Entertainment event (i.e. sporting event, play, etc.)",qday1tripend_Friend or family member's home,"qday1tripend_Grocery store or market (including deli, bodega, etc.)",qday1tripend_Home,qday1tripend_Other,qday1tripend_Outside of New York City,qday1tripend_Park/Recreational area/Gym,qday1tripend_Restaurant or bar,"qday1tripend_Retail store (e.g. clothing, electronic, hardware, etc.)",qday1tripend_School,qday1tripend_Work,qborough_end_Brooklyn,qborough_end_Manhattan,qborough_end_Outside of NYC,qborough_end_Queens,qborough_end_Staten Island,qborough_end_The Bronx,surveyzone_end_Inner Brooklyn,surveyzone_end_Inner Queens,surveyzone_end_Manhattan Core,surveyzone_end_Middle Queens,surveyzone_end_Northern Bronx,surveyzone_end_Northern Manhattan,surveyzone_end_Outer Brooklyn,surveyzone_end_Outer Queens,surveyzone_end_Southern Bronx,surveyzone_end_Staten Island,qday1tripendcode_Airport,qday1tripendcode_Bus stop,qday1tripendcode_Child's daycare facility or school,qday1tripendcode_Commuter rail station,qday1tripendcode_Doctor's office or hospital,"qday1tripendcode_Entertainment event (i.e. sporting event, play, etc.)",qday1tripendcode_Friend or family member's home,qday1tripendcode_Grand Central Station,"qday1tripendcode_Grocery store or market (including deli, bodega, etc.)",qday1tripendcode_Home,qday1tripendcode_Other,qday1tripendcode_PATH Station,qday1tripendcode_Park and ride/parking lot,qday1tripendcode_Park/Recreational area/Gym,qday1tripendcode_Penn Station,qday1tripendcode_Restaurant or bar,"qday1tripendcode_Retail store (e.g. clothing, electronic, hardware, etc.)",qday1tripendcode_Road/tunnel/bridge,qday1tripendcode_School,qday1tripendcode_Work,qday1triplength_cat_0 to 5,qday1triplength_cat_11 to 15,qday1triplength_cat_16 to 20,qday1triplength_cat_180+,qday1triplength_cat_21 to 25,qday1triplength_cat_26 to 30,qday1triplength_cat_31 to 35,qday1triplength_cat_36 to 40,qday1triplength_cat_41 to 45,qday1triplength_cat_46 to 50,qday1triplength_cat_51 to 55,qday1triplength_cat_56 to 60,qday1triplength_cat_6 to 10,qday1triplength_cat_61 to 180,qdisability1_No,qdisability1_Yes,qdisability2_No,qdisability2_Yes,qdisability3_No,qdisability3_Yes,qdisability4_No,qdisability4_Yes,qdisability5_No,qdisability5_Yes,qdisability6_No,qdisability6_Yes,qdisability7_No,qdisability7_Yes,qdisability8_No,qdisability8_Yes,qdisability9_No,qdisability9_Yes,qagecode_18-24,qagecode_25-34,qagecode_35-44,qagecode_45-54,qagecode_55-64,qagecode_65 or older,qlicense_No,qlicense_Yes,qcaraccess_I do not have access to a car,"qcaraccess_I do not personally own or lease a car, but I have access to a car belonging to a member of my household",qcaraccess_I personally own or lease a car,qcaraccess_Other,qwelfare1_No,qwelfare1_Yes,qwelfare2_No,qwelfare2_Yes,qwelfare3_No,qwelfare3_Yes,qwelfare4_No,qwelfare4_Yes,qwelfare5_No,qwelfare5_Yes,qcitibike_No,qcitibike_Yes,qday1triptravelcode_sp_Car Service,qday1triptravelcode_sp_Carpool,qday1triptravelcode_sp_Carshare,qday1triptravelcode_sp_Citi Bike,qday1triptravelcode_sp_Community van/dollar van,qday1triptravelcode_sp_Commuter rail,qday1triptravelcode_sp_Electric bicycle,qday1triptravelcode_sp_Express bus,qday1triptravelcode_sp_Green taxi,qday1triptravelcode_sp_Local bus,qday1triptravelcode_sp_Motorcycle,qday1triptravelcode_sp_Other,qday1triptravelcode_sp_Other ferry,qday1triptravelcode_sp_PATH train,qday1triptravelcode_sp_Paratransit/ Access-A-Ride,qday1triptravelcode_sp_Personal bicycle,qday1triptravelcode_sp_Personal car,qday1triptravelcode_sp_Ride-hail service such as Uber or Lyft,qday1triptravelcode_sp_Select bus service,"qday1triptravelcode_sp_Shared-ride service such a Uber Pool, Via, or Lyft Line",qday1triptravelcode_sp_Staten Island ferry,qday1triptravelcode_sp_Subway,qday1triptravelcode_sp_Walk,qday1triptravelcode_sp_Yellow taxi,qmodegrouping_Bike,qmodegrouping_Bus,qmodegrouping_Car,qmodegrouping_Commuter Rail,qmodegrouping_Ferry,qmodegrouping_For-Hire Vehicle,qmodegrouping_Other,qmodegrouping_Subway,qmodegrouping_Walk,qsustainablemode_No,qsustainablemode_Yes,qpurposerecode_Accompanying other traveler,qpurposerecode_Business,qpurposerecode_Commute to/from work,qpurposerecode_Dining,qpurposerecode_Medical visit (doctor's office),qpurposerecode_Other,qpurposerecode_Personal errands,qpurposerecode_School,qpurposerecode_Shopping,qpurposerecode_Social/recreation
0,-0.334052,0.468198,0.285697,-0.525553,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0
1,-0.334052,-0.094033,-0.117786,-0.096172,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0
2,-0.334052,-0.094033,-0.117786,-0.096172,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1
3,0.135854,0.208707,0.228056,-0.525553,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1
4,0.135854,0.208707,0.228056,-0.525553,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1


In [24]:
X.to_csv('scaled_dummy_reduced_df.csv')

In [None]:
#removed features
# "qincome_ 100,000− 149,999","qincome_ 15,000− 24,999","qincome_ 150,000− 199,999","qincome_$200,000 and above",
#            "qincome_ 25,000− 34,999","qincome_ 35,000− 49,999","qincome_ 50,000− 74,999","qincome_ 75,000− 99,999",
#            "qincome_Less than $14,999","qincome_Refused",,

# "qracecode_Asian, Non-Hispanic",
#            "qracecode_Black, Non-Hispanic","qracecode_Don't know, Non-Hispanic","qracecode_Hispanic",
#            "qracecode_Other, Non-Hispanic","qracecode_White, Non-Hispanic","qeducation_Associate degree (i.e., AA, AS)",
#            "qeducation_Bachelor's degree (i.e., BA, BS, AB)","qeducation_Graduate degree (i.e., Master's, Professional, Doctorate)",
#            "qeducation_High school graduate or equivalent (i.e., GED)","qeducation_No high school",
#            "qeducation_Some college but degree not received or in progress","qeducation_Some high school",

# "qsmartphone_No","qsmartphone_Refused","qsmartphone_Yes",
#            "qshare1_No","qshare1_Yes","qshare2_No","qshare2_Yes","qshare3_No","qshare3_Yes","qshare4_No",
#            "qshare4_Yes","qshare5_No","qshare5_Yes","qshare6_No","qshare6_Yes","qshare7_No","qshare7_Yes",
#            "qshare8_No","qshare8_Yes",

# "qgender_Female","qgender_Male",
#            "qgender_Refused",

# "qday1typical_Don't know",
# "qborough_start_Don't know",
# "qborough_end_Don't know",
# "qday1triptravelcode_sp_Don't know",
# "qday1typical_Refused",
# "qborough_start_Refused",
# "qday1tripend_Refused",
# "qborough_end_Refused",
# "qlicense_Refused",
# "qday1tripendcode_Refused",
# "qpurposerecode_Refused",
# "qcitibike_Refused",
# "qday1triptravelcode_sp_Refused",
# "qcaraccess_Refused",

