In [499]:
##pip install sodapy
from sodapy import Socrata
import pandas as pd
import datetime

In [500]:
##we filter all rows where vehicle_type column equals 'Car Service'
##client.timeout = 1000000 -> the connection doesnt time out after default time (10 seconds)
##limit by 1000000
client = Socrata("data.cityofnewyork.us", None)
client.timeout = 1000000
query = "SELECT * WHERE vehicle_type = 'Car Service' LIMIT 1000000 "



In [501]:
results = client.get("erm2-nwe9", query=query)

In [502]:
df = pd.DataFrame.from_records(results)

In [503]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8541 entries, 0 to 8540
Data columns (total 41 columns):
 #   Column                          Non-Null Count  Dtype 
---  ------                          --------------  ----- 
 0   unique_key                      8541 non-null   object
 1   created_date                    8541 non-null   object
 2   closed_date                     7330 non-null   object
 3   agency                          8541 non-null   object
 4   agency_name                     8541 non-null   object
 5   complaint_type                  8541 non-null   object
 6   descriptor                      8541 non-null   object
 7   incident_zip                    8088 non-null   object
 8   incident_address                4584 non-null   object
 9   street_name                     4584 non-null   object
 10  cross_street_1                  4846 non-null   object
 11  cross_street_2                  4842 non-null   object
 12  intersection_street_1           4867 non-null   

In [504]:
##choose relevant colu
df = df[['unique_key', 'created_date', 'agency', 'agency_name', 'complaint_type', 'descriptor', 'incident_zip', 
'incident_address', 'street_name', 'city', 'borough', 'taxi_company_borough', 'taxi_pick_up_location',
'latitude', 'longitude', 'location', 'location_type']]

In [505]:
##dropping null values from these columns as I'll change data type to number
df.dropna(subset=['latitude','longitude'], inplace=True)

##incident_zip has "N/A" value it needs to be classified as NaN first, then it can be dropped.
df['incident_zip'] = pd.to_numeric(df['incident_zip'], errors='coerce')
df.dropna(subset=['incident_zip'], inplace=True)

In [506]:
df.isnull().sum()

unique_key                  0
created_date                0
agency                      0
agency_name                 0
complaint_type              0
descriptor                  0
incident_zip                0
incident_address         3493
street_name              3493
city                      184
borough                     0
taxi_company_borough     2652
taxi_pick_up_location    6285
latitude                    0
longitude                   0
location                    0
location_type            1310
dtype: int64

In [507]:
##change data types
df['unique_key'] = df['unique_key'].astype('int')
df['latitude'] = df['latitude'].astype('float')
df['longitude'] = df['longitude'].astype('float')
df['incident_zip'] = df['incident_zip'].astype('int')

#convert date column to datetime 
df['created_date'] = pd.to_datetime(df['created_date'], 
                                    format='%Y-%m-%dT%H:%M:%S.%f')
df.dtypes

unique_key                        int64
created_date             datetime64[ns]
agency                           object
agency_name                      object
complaint_type                   object
descriptor                       object
incident_zip                      int64
incident_address                 object
street_name                      object
city                             object
borough                          object
taxi_company_borough             object
taxi_pick_up_location            object
latitude                        float64
longitude                       float64
location                         object
location_type                    object
dtype: object

In [508]:
df['year'] = df['created_date'].dt.year
df['month'] = df['created_date'].dt.month
df['day'] = df['created_date'].dt.dayofweek

In [509]:
df['agency'].unique()

array(['TLC'], dtype=object)

In [510]:
df['agency_name'].unique()

array(['Taxi and Limousine Commission'], dtype=object)

In [511]:
df['complaint_type'].unique()

array(['For Hire Vehicle Complaint', 'For Hire Vehicle Report',
       'Dispatched Taxi Complaint', 'Taxi Complaint',
       'FHV Licensee Complaint', 'Lost Property'], dtype=object)

In [512]:
df['descriptor'].unique()

array(['Car Service Company Complaint', 'Driver Complaint',
       'Driver Complaint - Passenger', 'Car Service Company Report',
       'Driver Report - Passenger', 'Driver Complaint - Non Passenger',
       'Insurance Information Requested', 'Driver Report - Non Passenger',
       'Driver Report', 'Equipment Complaint'], dtype=object)

In [513]:
df['incident_address'].unique()

array(['50 EAST   54 STREET', '65 PLACE', '291 SCHENECTADY AVENUE', ...,
       '19 EAST HOUSTON STREET', '2044 WESTCHESTER AVENUE',
       '245 WEST   17 STREET'], dtype=object)

In [514]:
df['street_name'].unique()

array(['EAST   54 STREET', '65 PLACE', 'SCHENECTADY AVENUE', ...,
       'DAFFODIL LANE', '49 AVENUE', 'WEST   17 STREET'], dtype=object)

In [515]:
df['city'].unique()

array(['NEW YORK', nan, 'BROOKLYN', 'JAMAICA', 'JACKSON HEIGHTS', 'BRONX',
       'RIDGEWOOD', 'LONG ISLAND CITY', 'REGO PARK', 'ASTORIA', 'CORONA',
       'STATEN ISLAND', 'EAST ELMHURST', 'SUNNYSIDE', 'ROSEDALE',
       'WOODSIDE', 'FLUSHING', 'FAR ROCKAWAY', 'WOODHAVEN',
       'SOUTH OZONE PARK', 'RICHMOND HILL', 'ELMHURST',
       'SPRINGFIELD GARDENS', 'OZONE PARK', 'LITTLE NECK',
       'FRESH MEADOWS', 'SOUTH RICHMOND HILL', 'SAINT ALBANS',
       'QUEENS VILLAGE', 'MASPETH', 'HOWARD BEACH', 'CAMBRIA HEIGHTS',
       'ARVERNE', 'HOLLIS', 'FOREST HILLS', 'BAYSIDE', 'MIDDLE VILLAGE',
       'GLEN OAKS', 'OAKLAND GARDENS', 'BELLEROSE', 'KEW GARDENS',
       'ROCKAWAY PARK', 'WHITESTONE', 'COLLEGE POINT', 'QUEENS',
       'UNKNOWN', 'NEW HYDE PARK', 'FLORAL PARK'], dtype=object)

In [516]:
df['borough'].unique()

array(['MANHATTAN', 'QUEENS', 'BROOKLYN', 'BRONX', 'STATEN ISLAND',
       'Unspecified'], dtype=object)

In [517]:
df['taxi_company_borough'].unique()

array(['MANHATTAN', 'QUEENS', 'BROOKLYN', nan, 'BRONX', 'STATEN ISLAND'],
      dtype=object)

In [518]:
df['taxi_pick_up_location'].unique()

array(['50 EAST   54 STREET, MANHATTAN (NEW YORK), NY, 10022',
       '65 PLACE AND METROPOLITAN AVENUE, QUEENS, NY, 11379',
       '291 SCHENECTADY AVENUE, BROOKLYN, NY, 11213', ...,
       '19 EAST HOUSTON STREET, MANHATTAN (NEW YORK), NY, 10012',
       '2044 WESTCHESTER AVENUE, BRONX, NY, 10462',
       '245 WEST   17 STREET, MANHATTAN (NEW YORK), NY, 10011'],
      dtype=object)

In [519]:
df['location_type'].unique()

array([nan, 'Highway', 'Street', 'Roadway Tunnel', 'Bridge', 'Taxi'],
      dtype=object)

In [520]:
##filtering from 2015 to 2021, and creating a new df called "df311"
startdate=2015
enddate=2021
df311 = df[(df['year']>= startdate) & (df['year']< enddate)]
df311.reset_index()

Unnamed: 0,index,unique_key,created_date,agency,agency_name,complaint_type,descriptor,incident_zip,incident_address,street_name,...,borough,taxi_company_borough,taxi_pick_up_location,latitude,longitude,location,location_type,year,month,day
0,0,45245054,2019-12-25 16:42:37,TLC,Taxi and Limousine Commission,For Hire Vehicle Complaint,Car Service Company Complaint,10022,50 EAST 54 STREET,EAST 54 STREET,...,MANHATTAN,MANHATTAN,"50 EAST 54 STREET, MANHATTAN (NEW YORK), NY,...",40.760172,-73.973331,"{'latitude': '40.76017203609217', 'longitude':...",,2019,12,2
1,1,45246167,2019-12-26 11:12:01,TLC,Taxi and Limousine Commission,For Hire Vehicle Complaint,Car Service Company Complaint,11379,65 PLACE,65 PLACE,...,QUEENS,QUEENS,"65 PLACE AND METROPOLITAN AVENUE, QUEENS, NY, ...",40.712478,-73.895025,"{'latitude': '40.712478350483835', 'longitude'...",,2019,12,3
2,2,45247198,2019-12-26 19:15:21,TLC,Taxi and Limousine Commission,For Hire Vehicle Complaint,Car Service Company Complaint,11213,291 SCHENECTADY AVENUE,SCHENECTADY AVENUE,...,BROOKLYN,BROOKLYN,"291 SCHENECTADY AVENUE, BROOKLYN, NY, 11213",40.668089,-73.933961,"{'latitude': '40.668088651233106', 'longitude'...",,2019,12,3
3,3,45248682,2019-12-26 14:42:45,TLC,Taxi and Limousine Commission,For Hire Vehicle Complaint,Car Service Company Complaint,11432,173-21 JAMAICA AVENUE,JAMAICA AVENUE,...,QUEENS,QUEENS,"173-21 JAMAICA AVENUE, QUEENS (JAMAICA), NY, 1...",40.707914,-73.786418,"{'latitude': '40.707914490470344', 'longitude'...",,2019,12,3
4,4,45249480,2019-12-26 23:01:26,TLC,Taxi and Limousine Commission,For Hire Vehicle Complaint,Car Service Company Complaint,10007,191 BROADWAY,BROADWAY,...,MANHATTAN,MANHATTAN,"191 BROADWAY, MANHATTAN (NEW YORK), NY, 10007",40.710374,-74.009519,"{'latitude': '40.710373925407936', 'longitude'...",,2019,12,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1286,8457,46773529,2020-07-06 22:10:46,TLC,Taxi and Limousine Commission,For Hire Vehicle Complaint,Car Service Company Complaint,10302,1616 CASTLETON AVENUE,CASTLETON AVENUE,...,STATEN ISLAND,STATEN ISLAND,"1616 CASTLETON AVENUE, STATEN IS (STATEN ISLAN...",40.636082,-74.137083,"{'latitude': '40.636082117053114', 'longitude'...",,2020,7,0
1287,8458,46836329,2020-07-12 15:06:19,TLC,Taxi and Limousine Commission,For Hire Vehicle Complaint,Car Service Company Complaint,11692,354 BEACH 56 STREET,BEACH 56 STREET,...,QUEENS,BRONX,"354 BEACH 56 STREET, QUEENS (ARVERNE), NY, 1...",40.593788,-73.786142,"{'latitude': '40.59378834000442', 'longitude':...",,2020,7,6
1288,8459,47037225,2020-07-30 15:30:24,TLC,Taxi and Limousine Commission,For Hire Vehicle Complaint,Car Service Company Complaint,10036,10 AVENUE,10 AVENUE,...,MANHATTAN,MANHATTAN,"10 AVENUE AND WEST 45 STREET, MANHATTAN, NY,...",40.761556,-73.994069,"{'latitude': '40.761555576126604', 'longitude'...",,2020,7,3
1289,8460,47104021,2020-08-04 11:24:47,TLC,Taxi and Limousine Commission,For Hire Vehicle Complaint,Car Service Company Complaint,11101,21-24 44 AVENUE,44 AVENUE,...,QUEENS,,"21-24 44 AVENUE, QUEENS (LONG ISLAND CITY), NY...",40.749420,-73.946282,"{'latitude': '40.74941966159945', 'longitude':...",,2020,8,1


In [521]:
print(min(df311['created_date']))
print(max(df311['created_date']))

2015-01-05 16:23:31
2020-12-31 09:28:13


In [522]:
df311.head()

Unnamed: 0,unique_key,created_date,agency,agency_name,complaint_type,descriptor,incident_zip,incident_address,street_name,city,borough,taxi_company_borough,taxi_pick_up_location,latitude,longitude,location,location_type,year,month,day
0,45245054,2019-12-25 16:42:37,TLC,Taxi and Limousine Commission,For Hire Vehicle Complaint,Car Service Company Complaint,10022,50 EAST 54 STREET,EAST 54 STREET,NEW YORK,MANHATTAN,MANHATTAN,"50 EAST 54 STREET, MANHATTAN (NEW YORK), NY,...",40.760172,-73.973331,"{'latitude': '40.76017203609217', 'longitude':...",,2019,12,2
1,45246167,2019-12-26 11:12:01,TLC,Taxi and Limousine Commission,For Hire Vehicle Complaint,Car Service Company Complaint,11379,65 PLACE,65 PLACE,,QUEENS,QUEENS,"65 PLACE AND METROPOLITAN AVENUE, QUEENS, NY, ...",40.712478,-73.895025,"{'latitude': '40.712478350483835', 'longitude'...",,2019,12,3
2,45247198,2019-12-26 19:15:21,TLC,Taxi and Limousine Commission,For Hire Vehicle Complaint,Car Service Company Complaint,11213,291 SCHENECTADY AVENUE,SCHENECTADY AVENUE,BROOKLYN,BROOKLYN,BROOKLYN,"291 SCHENECTADY AVENUE, BROOKLYN, NY, 11213",40.668089,-73.933961,"{'latitude': '40.668088651233106', 'longitude'...",,2019,12,3
3,45248682,2019-12-26 14:42:45,TLC,Taxi and Limousine Commission,For Hire Vehicle Complaint,Car Service Company Complaint,11432,173-21 JAMAICA AVENUE,JAMAICA AVENUE,JAMAICA,QUEENS,QUEENS,"173-21 JAMAICA AVENUE, QUEENS (JAMAICA), NY, 1...",40.707914,-73.786418,"{'latitude': '40.707914490470344', 'longitude'...",,2019,12,3
4,45249480,2019-12-26 23:01:26,TLC,Taxi and Limousine Commission,For Hire Vehicle Complaint,Car Service Company Complaint,10007,191 BROADWAY,BROADWAY,NEW YORK,MANHATTAN,MANHATTAN,"191 BROADWAY, MANHATTAN (NEW YORK), NY, 10007",40.710374,-74.009519,"{'latitude': '40.710373925407936', 'longitude'...",,2019,12,3


In [523]:
df311.to_csv ('311_Service_Request_TLC.csv', index = False, header=True)