In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import cross_val_score, GridSearchCV, RandomizedSearchCV, train_test_split

from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from imblearn.pipeline import Pipeline

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import VotingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import classification_report

import datetime as dt

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [2]:
train = pd.read_csv('data_train.csv')
alerts = pd.read_csv('alerts.csv')
irreg = pd.read_csv('irregularities.csv')

# Preprocessing: Alerts

In [3]:
train.head()

Unnamed: 0,Ids,Labels
0,2e69e9384_2020-10-06_13,True
1,2e6992c7c_2020-10-02_17,True
2,2e69ef474_2020-09-13_19,True
3,2e69c5fd4_2020-10-10_15,True
4,2e6992134_2020-09-12_11,True


In [4]:
train.tail()

Unnamed: 0,Ids,Labels
71331,2e69eea5c_2020-11-09_10,False
71332,2e69c5944_2020-10-27_12,True
71333,2e69f2cd4_2020-11-07_14,True
71334,2e68e64e4_2020-09-23_9,False
71335,2e68e6084_2020-10-18_12,False


In [5]:
alerts.head()

Unnamed: 0,id,pub_millis,s2id_15,s2token_15,road_type,street,city,magvar,reliability,report_description,report_rating,confidence,type,subtype,report_by_municipality_user,n_thumbs_up,longitude,latitude
0,177876895,1603331480000,3344466888162803712,2e69eeea4,1,,Depok,0,9,,0,1,ROAD_CLOSED,ROAD_CLOSED_EVENT,,,106.788545,-6.359846
1,179156987,1604487892000,3344463130066419712,2e69eb7f4,6,N8 Jalan Raya Bogor,Depok,170,7,,1,1,JAM,JAM_HEAVY_TRAFFIC,,0.0,106.867141,-6.383855
2,181688703,1605666614000,3344367648648462336,2e6994a84,7,,Bekasi,0,8,,0,1,ROAD_CLOSED,ROAD_CLOSED_EVENT,,,106.921974,-6.379087
3,173055165,1601895721000,3344374458319110144,2e699ad9c,2,Flyover Tegal Gede,Cikarang,319,10,,5,0,WEATHERHAZARD,HAZARD_ON_ROAD_POT_HOLE,,,107.143656,-6.300441
4,173802602,1602464394000,3344466709921660928,2e69eec0c,2,Tanjakan Kembar,Depok,310,5,,3,0,JAM,JAM_HEAVY_TRAFFIC,,,106.79395,-6.365677


## Change millisecond to Date and Time

In [6]:
alerts['pub_date'] = [dt.datetime.fromtimestamp(i/1000.0) for i in alerts['pub_millis']]
alerts['pub_date'].head()

0   2020-10-22 08:51:20
1   2020-11-04 18:04:52
2   2020-11-18 09:30:14
3   2020-10-05 18:02:01
4   2020-10-12 07:59:54
Name: pub_date, dtype: datetime64[ns]

In [7]:
alerts.head()

Unnamed: 0,id,pub_millis,s2id_15,s2token_15,road_type,street,city,magvar,reliability,report_description,report_rating,confidence,type,subtype,report_by_municipality_user,n_thumbs_up,longitude,latitude,pub_date
0,177876895,1603331480000,3344466888162803712,2e69eeea4,1,,Depok,0,9,,0,1,ROAD_CLOSED,ROAD_CLOSED_EVENT,,,106.788545,-6.359846,2020-10-22 08:51:20
1,179156987,1604487892000,3344463130066419712,2e69eb7f4,6,N8 Jalan Raya Bogor,Depok,170,7,,1,1,JAM,JAM_HEAVY_TRAFFIC,,0.0,106.867141,-6.383855,2020-11-04 18:04:52
2,181688703,1605666614000,3344367648648462336,2e6994a84,7,,Bekasi,0,8,,0,1,ROAD_CLOSED,ROAD_CLOSED_EVENT,,,106.921974,-6.379087,2020-11-18 09:30:14
3,173055165,1601895721000,3344374458319110144,2e699ad9c,2,Flyover Tegal Gede,Cikarang,319,10,,5,0,WEATHERHAZARD,HAZARD_ON_ROAD_POT_HOLE,,,107.143656,-6.300441,2020-10-05 18:02:01
4,173802602,1602464394000,3344466709921660928,2e69eec0c,2,Tanjakan Kembar,Depok,310,5,,3,0,JAM,JAM_HEAVY_TRAFFIC,,,106.79395,-6.365677,2020-10-12 07:59:54


In [8]:
alerts['pub_date'].head(3)

0   2020-10-22 08:51:20
1   2020-11-04 18:04:52
2   2020-11-18 09:30:14
Name: pub_date, dtype: datetime64[ns]

## Split `dates` and `hour` into Two New Columns

In [9]:
alerts['dates'] = [i.strftime('%Y-%m-%d') for i in alerts['pub_date']]

In [10]:
alerts['hour'] = [str(i.hour) for i in alerts['pub_date']]

In [11]:
alerts.head()

Unnamed: 0,id,pub_millis,s2id_15,s2token_15,road_type,street,city,magvar,reliability,report_description,report_rating,confidence,type,subtype,report_by_municipality_user,n_thumbs_up,longitude,latitude,pub_date,dates,hour
0,177876895,1603331480000,3344466888162803712,2e69eeea4,1,,Depok,0,9,,0,1,ROAD_CLOSED,ROAD_CLOSED_EVENT,,,106.788545,-6.359846,2020-10-22 08:51:20,2020-10-22,8
1,179156987,1604487892000,3344463130066419712,2e69eb7f4,6,N8 Jalan Raya Bogor,Depok,170,7,,1,1,JAM,JAM_HEAVY_TRAFFIC,,0.0,106.867141,-6.383855,2020-11-04 18:04:52,2020-11-04,18
2,181688703,1605666614000,3344367648648462336,2e6994a84,7,,Bekasi,0,8,,0,1,ROAD_CLOSED,ROAD_CLOSED_EVENT,,,106.921974,-6.379087,2020-11-18 09:30:14,2020-11-18,9
3,173055165,1601895721000,3344374458319110144,2e699ad9c,2,Flyover Tegal Gede,Cikarang,319,10,,5,0,WEATHERHAZARD,HAZARD_ON_ROAD_POT_HOLE,,,107.143656,-6.300441,2020-10-05 18:02:01,2020-10-05,18
4,173802602,1602464394000,3344466709921660928,2e69eec0c,2,Tanjakan Kembar,Depok,310,5,,3,0,JAM,JAM_HEAVY_TRAFFIC,,,106.79395,-6.365677,2020-10-12 07:59:54,2020-10-12,7


## Combine `s2token_15`, `dates` and `hour` into ids for Merging with Train DF

In [12]:
alerts['Ids'] = alerts['s2token_15']+'_'+alerts['dates']+'_'+alerts['hour']

In [13]:
alerts.head()

Unnamed: 0,id,pub_millis,s2id_15,s2token_15,road_type,street,city,magvar,reliability,report_description,report_rating,confidence,type,subtype,report_by_municipality_user,n_thumbs_up,longitude,latitude,pub_date,dates,hour,Ids
0,177876895,1603331480000,3344466888162803712,2e69eeea4,1,,Depok,0,9,,0,1,ROAD_CLOSED,ROAD_CLOSED_EVENT,,,106.788545,-6.359846,2020-10-22 08:51:20,2020-10-22,8,2e69eeea4_2020-10-22_8
1,179156987,1604487892000,3344463130066419712,2e69eb7f4,6,N8 Jalan Raya Bogor,Depok,170,7,,1,1,JAM,JAM_HEAVY_TRAFFIC,,0.0,106.867141,-6.383855,2020-11-04 18:04:52,2020-11-04,18,2e69eb7f4_2020-11-04_18
2,181688703,1605666614000,3344367648648462336,2e6994a84,7,,Bekasi,0,8,,0,1,ROAD_CLOSED,ROAD_CLOSED_EVENT,,,106.921974,-6.379087,2020-11-18 09:30:14,2020-11-18,9,2e6994a84_2020-11-18_9
3,173055165,1601895721000,3344374458319110144,2e699ad9c,2,Flyover Tegal Gede,Cikarang,319,10,,5,0,WEATHERHAZARD,HAZARD_ON_ROAD_POT_HOLE,,,107.143656,-6.300441,2020-10-05 18:02:01,2020-10-05,18,2e699ad9c_2020-10-05_18
4,173802602,1602464394000,3344466709921660928,2e69eec0c,2,Tanjakan Kembar,Depok,310,5,,3,0,JAM,JAM_HEAVY_TRAFFIC,,,106.79395,-6.365677,2020-10-12 07:59:54,2020-10-12,7,2e69eec0c_2020-10-12_7


## Make Function to Simplifies The Proccess

In [14]:
def make_ids(df, col_millis, col_token):
    df['pub_date'] = [dt.datetime.fromtimestamp(i/1000.0) for i in df[col_millis]]
    df['dates'] = [i.strftime('%Y-%m-%d') for i in df['pub_date']]
    df['hour'] = [str(i.hour) for i in df['pub_date']]
    df['Ids'] = df[col_token]+'_'+df['dates']+'_'+df['hour']
    return df

# Preprocessing `irregularities`

In [15]:
irreg.head()

Unnamed: 0,id,detection_date_millis,update_date_millis,street,city,is_highway,line,s2id_center,s2token_center,speed,regular_speed,delay_seconds,seconds,length,trend,type,severity,jam_level,drivers_count,alerts_count,n_thumbs_up
0,12868069,1604733149024,1604735467276,Jatiwaringin Raya,Bekasi,t,"{""line"": [{""x"": 106.91014, ""y"": -6.258107}, {""...",3344471185277583360,2e69f2d2c,13.03,17.15,299,432,1566,0,Small,5,3,13,0,0
1,12420463,1599906813144,1599909295834,Putri Tunggal,Depok,f,"{""line"": [{""x"": 106.887821, ""y"": -6.377016}, {...",3344462996922433536,2e69eb604,6.56,18.0,399,539,984,0,Small,5,3,5,0,0
2,12497533,1601728355356,1601734996933,Ir Haji Juanda,Bandung,f,"{""line"": [{""x"": 107.618629, ""y"": -6.87556}, {""...",3344176694402482176,2e68e6fc4,3.36,19.65,1185,1294,1212,1,Large,5,4,21,2,0
3,12536831,1602312860279,1602315706305,KH Muchtar Tabrani,Bekasi,f,"{""line"": [{""x"": 107.002934, ""y"": -6.216088}, {...",3344358143885836288,2e698c034,4.36,8.04,467,543,659,-1,Small,5,4,3,0,0
4,12327151,1598956623240,1598957378934,N1 Pangeran Diponegoro,Tambun Selatan,t,"{""line"": [{""x"": 107.035652, ""y"": -6.255471}, {...",3344360723013697536,2e698e5bc,4.74,16.71,423,474,625,0,Small,5,4,11,0,0


In [16]:
irreg_up = make_ids(irreg, 'update_date_millis', 's2token_center')

In [17]:
irreg_det = make_ids(irreg, 'detection_date_millis', 's2token_center')

# Joining

In [24]:
test = pd.read_csv('data_test.csv')
test.shape

(13841, 1)

In [25]:
train.shape

(71336, 2)

In [28]:
alerts.shape

(7800662, 22)

In [26]:
display(train.head(1))
display(test.head(1))
display(alerts.head(1))
display(irreg_up.head(1))

Unnamed: 0,Ids,Labels
0,2e69e9384_2020-10-06_13,True


Unnamed: 0,Ids
0,2e6992a84_2020-11-25_18


Unnamed: 0,id,pub_millis,s2id_15,s2token_15,road_type,street,city,magvar,reliability,report_description,report_rating,confidence,type,subtype,report_by_municipality_user,n_thumbs_up,longitude,latitude,pub_date,dates,hour,Ids
0,177876895,1603331480000,3344466888162803712,2e69eeea4,1,,Depok,0,9,,0,1,ROAD_CLOSED,ROAD_CLOSED_EVENT,,,106.788545,-6.359846,2020-10-22 08:51:20,2020-10-22,8,2e69eeea4_2020-10-22_8


Unnamed: 0,id,detection_date_millis,update_date_millis,street,city,is_highway,line,s2id_center,s2token_center,speed,regular_speed,delay_seconds,seconds,length,trend,type,severity,jam_level,drivers_count,alerts_count,n_thumbs_up,pub_date,dates,hour,Ids
0,12868069,1604733149024,1604735467276,Jatiwaringin Raya,Bekasi,t,"{""line"": [{""x"": 106.91014, ""y"": -6.258107}, {""...",3344471185277583360,2e69f2d2c,13.03,17.15,299,432,1566,0,Small,5,3,13,0,0,2020-11-07 14:12:29.024,2020-11-07,14,2e69f2d2c_2020-11-07_14


In [27]:
merge_train_alerts = pd.merge(train, alerts, on=['Ids'])

In [30]:
data = pd.merge(merge_train_alerts, irreg_up, on=['Ids'])

In [31]:
data.shape

(3707448, 47)

In [38]:
data.head()

Unnamed: 0,Ids,Labels,id_x,pub_millis,s2id_15,s2token_15,road_type,street_x,city_x,magvar,reliability,report_description,report_rating,confidence,type_x,subtype,report_by_municipality_user,n_thumbs_up_x,longitude,latitude,pub_date_x,dates_x,hour_x,id_y,detection_date_millis,update_date_millis,street_y,city_y,is_highway,line,s2id_center,s2token_center,speed,regular_speed,delay_seconds,seconds,length,trend,type_y,severity,jam_level,drivers_count,alerts_count,n_thumbs_up_y,pub_date_y,dates_y,hour_y
0,2e698e484_2020-09-25_10,True,170560751,1601003424000,3344360639261835264,2e698e484,2,Lambang Jaya,Tambun Selatan,109,6,,2,0,JAM,JAM_STAND_STILL_TRAFFIC,,0.0,107.044226,-6.270431,2020-09-25 10:10:24,2020-09-25,10,12454338,1601005939233,1601008279450,Lambang Jaya,Tambun Selatan,f,"{""line"": [{""x"": 107.040272, ""y"": -6.268992}, {...",3344360639261835264,2e698e484,6.75,25.63,362,440,827,1,Small,5,4,7,0,0,2020-09-25 10:52:19.233,2020-09-25,10
1,2e698e484_2020-09-25_10,True,170560751,1601003424000,3344360639261835264,2e698e484,2,Lambang Jaya,Tambun Selatan,109,6,,2,0,JAM,JAM_STAND_STILL_TRAFFIC,,0.0,107.044226,-6.270431,2020-09-25 10:10:24,2020-09-25,10,12454321,1601005939233,1601007955894,Lambang Jaya,Tambun Selatan,f,"{""line"": [{""x"": 107.03547, ""y"": -6.267191}, {""...",3344360639261835264,2e698e484,8.01,30.29,500,625,1394,1,Medium,5,4,7,0,0,2020-09-25 10:52:19.233,2020-09-25,10
2,2e698e484_2020-09-25_10,True,170560751,1601003424000,3344360639261835264,2e698e484,2,Lambang Jaya,Tambun Selatan,109,6,,2,0,JAM,JAM_STAND_STILL_TRAFFIC,,0.0,107.044226,-6.270431,2020-09-25 10:10:24,2020-09-25,10,12454341,1601005939233,1601008357781,Lambang Jaya,Tambun Selatan,f,"{""line"": [{""x"": 107.040272, ""y"": -6.268992}, {...",3344360639261835264,2e698e484,6.78,25.61,360,438,827,0,Small,5,4,6,0,0,2020-09-25 10:52:19.233,2020-09-25,10
3,2e698e484_2020-09-25_10,True,170560751,1601003424000,3344360639261835264,2e698e484,2,Lambang Jaya,Tambun Selatan,109,6,,2,0,JAM,JAM_STAND_STILL_TRAFFIC,,0.0,107.044226,-6.270431,2020-09-25 10:10:24,2020-09-25,10,12454315,1601005939233,1601007798478,Lambang Jaya,Tambun Selatan,f,"{""line"": [{""x"": 107.03547, ""y"": -6.267191}, {""...",3344360639261835264,2e698e484,9.14,30.45,423,548,1394,-1,Small,5,3,7,0,0,2020-09-25 10:52:19.233,2020-09-25,10
4,2e698e484_2020-09-25_10,True,170560751,1601003424000,3344360639261835264,2e698e484,2,Lambang Jaya,Tambun Selatan,109,6,,2,0,JAM,JAM_STAND_STILL_TRAFFIC,,0.0,107.044226,-6.270431,2020-09-25 10:10:24,2020-09-25,10,12454330,1601005939233,1601008114231,Lambang Jaya,Tambun Selatan,f,"{""line"": [{""x"": 107.040272, ""y"": -6.268992}, {...",3344360639261835264,2e698e484,8.12,26.33,288,366,827,0,Small,5,4,7,0,0,2020-09-25 10:52:19.233,2020-09-25,10


## EDA

In [42]:
data.head(1)

Unnamed: 0,Ids,Labels,id_x,pub_millis,s2id_15,s2token_15,road_type,street_x,city_x,magvar,reliability,report_description,report_rating,confidence,type_x,subtype,report_by_municipality_user,n_thumbs_up_x,longitude,latitude,pub_date_x,dates_x,hour_x,id_y,detection_date_millis,update_date_millis,street_y,city_y,is_highway,line,s2id_center,s2token_center,speed,regular_speed,delay_seconds,seconds,length,trend,type_y,severity,jam_level,drivers_count,alerts_count,n_thumbs_up_y,pub_date_y,dates_y,hour_y
0,2e698e484_2020-09-25_10,True,170560751,1601003424000,3344360639261835264,2e698e484,2,Lambang Jaya,Tambun Selatan,109,6,,2,0,JAM,JAM_STAND_STILL_TRAFFIC,,0.0,107.044226,-6.270431,2020-09-25 10:10:24,2020-09-25,10,12454338,1601005939233,1601008279450,Lambang Jaya,Tambun Selatan,f,"{""line"": [{""x"": 107.040272, ""y"": -6.268992}, {...",3344360639261835264,2e698e484,6.75,25.63,362,440,827,1,Small,5,4,7,0,0,2020-09-25 10:52:19.233,2020-09-25,10


In [40]:
(data.isna().sum()/len(data)*100).sort_values(ascending=False).head(10)

report_description             100.000000
report_by_municipality_user    100.000000
n_thumbs_up_x                   76.760591
subtype                          4.640524
street_x                         3.114649
street_y                         0.900458
report_rating                    0.000000
pub_date_x                       0.000000
latitude                         0.000000
longitude                        0.000000
dtype: float64

**Note**: We can delete columns that contain a lot of NaN values. The following is also the column that has no relationship with the target variable.

In [39]:
data.describe()

Unnamed: 0,id_x,pub_millis,s2id_15,road_type,magvar,reliability,report_description,report_rating,confidence,report_by_municipality_user,n_thumbs_up_x,longitude,latitude,id_y,detection_date_millis,update_date_millis,s2id_center,speed,regular_speed,delay_seconds,seconds,length,trend,severity,jam_level,drivers_count,alerts_count,n_thumbs_up_y
count,3707448.0,3707448.0,3707448.0,3707448.0,3707448.0,3707448.0,0.0,3707448.0,3707448.0,0.0,861589.0,3707448.0,3707448.0,3707448.0,3707448.0,3707448.0,3707448.0,3707448.0,3707448.0,3707448.0,3707448.0,3707448.0,3707448.0,3707448.0,3707448.0,3707448.0,3707448.0,3707448.0
mean,176434000.0,1603416000000.0,3.34433e+18,4.891745,162.5304,5.642938,,1.759216,0.1692776,,0.0,107.1354,-6.540876,12697840.0,1603416000000.0,1603419000000.0,3.34433e+18,6.366959,20.78335,661.1974,781.7901,1265.223,-0.1781204,5.0,3.759645,10.97375,0.9342653,0.329353
std,3912717.0,1660056000.0,114845400000000.0,2.581285,118.1746,1.024156,,1.389461,0.5446661,,0.0,0.347385,0.2697997,181540.9,1659972000.0,1660245000.0,114845400000000.0,3.198366,10.68939,382.3192,405.388,740.0426,0.7858336,0.0,0.4440751,8.998078,1.438933,1.058309
min,166321100.0,1598920000000.0,3.344137e+18,1.0,0.0,5.0,,0.0,0.0,,0.0,106.7219,-6.961995,12323880.0,1598920000000.0,1598921000000.0,3.344137e+18,0.57,3.75,69.0,120.0,500.0,-1.0,5.0,1.0,1.0,0.0,0.0
25%,173516400.0,1602319000000.0,3.344178e+18,2.0,54.0,5.0,,1.0,0.0,,0.0,106.9011,-6.880992,12540330.0,1602319000000.0,1602321000000.0,3.344178e+18,4.0,14.89,445.0,538.0,716.0,-1.0,5.0,4.0,5.0,0.0,0.0
50%,177613000.0,1603854000000.0,3.344361e+18,6.0,166.0,5.0,,2.0,0.0,,0.0,106.9605,-6.395953,12687450.0,1603857000000.0,1603859000000.0,3.344361e+18,5.65,19.21,583.0,692.0,1027.0,0.0,5.0,4.0,9.0,0.0,0.0
75%,179479100.0,1604660000000.0,3.344425e+18,7.0,267.0,6.0,,3.0,0.0,,0.0,107.6023,-6.308303,12855820.0,1604661000000.0,1604663000000.0,3.344425e+18,7.96,24.87,775.0,925.0,1542.0,0.0,5.0,4.0,14.0,1.0,0.0
max,182155200.0,1606042000000.0,3.344471e+18,22.0,359.0,10.0,,5.0,5.0,,0.0,107.7158,-6.160888,13060290.0,1606042000000.0,1606048000000.0,3.344471e+18,28.65,1832.77,7053.0,5217.0,6191.0,1.0,5.0,4.0,100.0,15.0,25.0


In [43]:
# !pip install s2cell

Collecting s2cell
  Downloading s2cell-1.4.0-py3-none-any.whl (17 kB)
Installing collected packages: s2cell
Successfully installed s2cell-1.4.0


In [45]:
import s2cell

In [49]:
coor = [s2cell.token_to_lat_lon(i) for i in data['s2token_15']]

In [59]:
coor = np.array(coor)

In [60]:
data['lat'] = coor[:,0]
data['lon'] = coor[:,1]

In [61]:
data.head(1)

Unnamed: 0,Ids,Labels,id_x,pub_millis,s2id_15,s2token_15,road_type,street_x,city_x,magvar,reliability,report_description,report_rating,confidence,type_x,subtype,report_by_municipality_user,n_thumbs_up_x,longitude,latitude,pub_date_x,dates_x,hour_x,id_y,detection_date_millis,update_date_millis,street_y,city_y,is_highway,line,s2id_center,s2token_center,speed,regular_speed,delay_seconds,seconds,length,trend,type_y,severity,jam_level,drivers_count,alerts_count,n_thumbs_up_y,pub_date_y,dates_y,hour_y,lat,lon
0,2e698e484_2020-09-25_10,True,170560751,1601003424000,3344360639261835264,2e698e484,2,Lambang Jaya,Tambun Selatan,109,6,,2,0,JAM,JAM_STAND_STILL_TRAFFIC,,0.0,107.044226,-6.270431,2020-09-25 10:10:24,2020-09-25,10,12454338,1601005939233,1601008279450,Lambang Jaya,Tambun Selatan,f,"{""line"": [{""x"": 107.040272, ""y"": -6.268992}, {...",3344360639261835264,2e698e484,6.75,25.63,362,440,827,1,Small,5,4,7,0,0,2020-09-25 10:52:19.233,2020-09-25,10,-6.270088,107.043609


In [68]:
data['day'] = data['pub_date_y'].dt.day
data['month'] = data['pub_date_y'].dt.month

In [69]:
data.head(1)

Unnamed: 0,Ids,Labels,id_x,pub_millis,s2id_15,s2token_15,road_type,street_x,city_x,magvar,reliability,report_description,report_rating,confidence,type_x,subtype,report_by_municipality_user,n_thumbs_up_x,longitude,latitude,pub_date_x,dates_x,hour_x,id_y,detection_date_millis,update_date_millis,street_y,city_y,is_highway,line,s2id_center,s2token_center,speed,regular_speed,delay_seconds,seconds,length,trend,type_y,severity,jam_level,drivers_count,alerts_count,n_thumbs_up_y,pub_date_y,dates_y,hour_y,lat,lon,day,month
0,2e698e484_2020-09-25_10,True,170560751,1601003424000,3344360639261835264,2e698e484,2,Lambang Jaya,Tambun Selatan,109,6,,2,0,JAM,JAM_STAND_STILL_TRAFFIC,,0.0,107.044226,-6.270431,2020-09-25 10:10:24,2020-09-25,10,12454338,1601005939233,1601008279450,Lambang Jaya,Tambun Selatan,f,"{""line"": [{""x"": 107.040272, ""y"": -6.268992}, {...",3344360639261835264,2e698e484,6.75,25.63,362,440,827,1,Small,5,4,7,0,0,2020-09-25 10:52:19.233,2020-09-25,10,-6.270088,107.043609,25,9


In [73]:
ready = data[['lat', 'lon', 'day', 'month', 'Labels']]
ready

Unnamed: 0,lat,lon,day,month,Labels
0,-6.270088,107.043609,25,9,True
1,-6.270088,107.043609,25,9,True
2,-6.270088,107.043609,25,9,True
3,-6.270088,107.043609,25,9,True
4,-6.270088,107.043609,25,9,True
...,...,...,...,...,...
3707443,-6.886813,107.581583,22,11,True
3707444,-6.886813,107.581583,22,11,True
3707445,-6.886813,107.581583,22,11,True
3707446,-6.886813,107.581583,22,11,True


In [None]:
ready.to_csv('ready.csv')