In [1]:
import numpy as np
import pandas as pd
from openpyxl import load_workbook
import warnings
warnings.filterwarnings('ignore')

In [2]:
data = pd.read_excel('data-set.xlsx', sheet_name='SEWAATM', usecols='A:H', header=6)

In [3]:
df = data.iloc[0:1247]

Kolom `Tagihan` tidak dimasukkan karena terlalu abstrak. Jika diberikan waktu lebih, akan dapat tercover. Untuk saat ini, hanya sampai kolom `H` saja.

In [4]:
df.index += 1

In [5]:
df.drop('Unnamed: 0', axis=1, inplace=True)

Nama kolom dan semua value yang ada di dalamnya akan dibuat menjadi huruf kecil (lower()) untuk alasan kesetaraan.

In [6]:
col_dict = {'Unnamed: 1':'tanggal_request',
            'Unnamed: 2':'nama_lokasi',
            'Unnamed: 3':'kota',
            'Unnamed: 4':'masa_sewa_(tahun)',
            'Unnamed: 5':'delivery',
            'Unnamed: 6':'tipe',
            'Unnamed: 7':'jatuh_tempo'}

In [7]:
df.rename(columns=col_dict, inplace=True)

In [8]:
df['nama_lokasi'] = df['nama_lokasi'].str.lower()
df['kota'] = df['kota'].str.lower()

In [9]:
df.head()

Unnamed: 0,tanggal_request,nama_lokasi,kota,masa_sewa_(tahun),delivery,tipe,jatuh_tempo
1,2010-09-21 00:00:00,pln kota,jkt-hub ii rpc,,2010-10-07 00:00:00,522,2013-10-01 00:00:00
2,2010-09-21 00:00:00,perumnas,jkt-hub i rpc,,2010-10-07 00:00:00,522,2013-10-01 00:00:00
3,2010-09-21 00:00:00,tip top depok,jkt-hub viii rpc,,2010-10-07 00:00:00,522,2013-10-01 00:00:00
4,2010-09-21 00:00:00,tip top pdk bambu,jkt-hub vi rpc,,2010-10-07 00:00:00,522,2013-10-01 00:00:00
5,2010-09-21 00:00:00,tip top rawamangun,jkt-hub vii,,2010-10-07 00:00:00,529EPP7,2013-10-01 00:00:00


In [10]:
df.shape

(1247, 7)

In [11]:
df.isna().sum()

tanggal_request       75
nama_lokasi            0
kota                   0
masa_sewa_(tahun)    826
delivery             162
tipe                 106
jatuh_tempo          175
dtype: int64

`masa_sewa_(tahun)` terlalu banyak missing value, kolom ini tidak akan dipakai. Nilai missing value yang lain akan di-drop row-nya.

In [12]:
df.drop('masa_sewa_(tahun)', axis=1, inplace=True)

In [13]:
df.dropna(inplace=True)

In [14]:
df.isna().sum()

tanggal_request    0
nama_lokasi        0
kota               0
delivery           0
tipe               0
jatuh_tempo        0
dtype: int64

In [15]:
df.shape

(988, 6)

Datetime `tanggal_request` dan `jatuh_tempo`

In [16]:
df['tanggal_request'] = df['tanggal_request'].astype(str).str[:10]
df['jatuh_tempo'] = df['jatuh_tempo'].astype(str).str[:10]
df['delivery'] = df['delivery'].astype(str).str[:10]

In [17]:
# df['tanggal_request'] = df['tanggal_request'].replace('nan', np.nan)
# df['jatuh_tempo'] = df['jatuh_tempo'].replace('nan', np.nan)
# df['delivery'] = df['delivery'].replace('nan', np.nan)

In [18]:
df

Unnamed: 0,tanggal_request,nama_lokasi,kota,delivery,tipe,jatuh_tempo
1,2010-09-21,pln kota,jkt-hub ii rpc,2010-10-07,522,2013-10-01
2,2010-09-21,perumnas,jkt-hub i rpc,2010-10-07,522,2013-10-01
3,2010-09-21,tip top depok,jkt-hub viii rpc,2010-10-07,522,2013-10-01
4,2010-09-21,tip top pdk bambu,jkt-hub vi rpc,2010-10-07,522,2013-10-01
5,2010-09-21,tip top rawamangun,jkt-hub vii,2010-10-07,529EPP7,2013-10-01
...,...,...,...,...,...,...
1195,2020-04-24,kk tuban,surabaya,2020-11-18,GRGW10,2024-12-01
1196,2020-04-24,kk sampang,surabaya,2020-11-18,GRGW10,2024-12-01
1198,2020-06-11,kk praya mataram,mataram,2020-09-10,GRGW10,2024-10-01
1206,2020-07-16,kk jombang,sidoarjo,2020-11-18,GRGW10,2024-12-01


**`status`**

In [19]:
df[df['kota'].str.contains('rpc')]

Unnamed: 0,tanggal_request,nama_lokasi,kota,delivery,tipe,jatuh_tempo
1,2010-09-21,pln kota,jkt-hub ii rpc,2010-10-07,522,2013-10-01
2,2010-09-21,perumnas,jkt-hub i rpc,2010-10-07,522,2013-10-01
3,2010-09-21,tip top depok,jkt-hub viii rpc,2010-10-07,522,2013-10-01
4,2010-09-21,tip top pdk bambu,jkt-hub vi rpc,2010-10-07,522,2013-10-01
6,2010-09-21,lia pramuka,jkt-hub i rpc,2010-10-07,522,2013-10-01
...,...,...,...,...,...,...
280,2013-09-30,rsup sanglah / stikom denpasar,denpasar rpc,2013-11-27,WIN,2017-01-01
288,2013-10-28,spbu adi sucipto,yogyakarta rpc,2013-11-27,WIN,2016-12-01
290,2013-11-06,capem harapan indah,jkt-hub vii rpc,2013-12-02,WIN,2017-01-01
737,2016-07-25,atm indomaret sudirman simp pangeran hidayat (...,pekanbaru rpc,2016-09-05,529EPP7CL,2019-10-01


In [20]:
df['status'] = ''

In [21]:
df['status'][df['kota'].str.contains('rpc')] = 'rpc'
df['status'][df['kota'].str.contains('cancel')] = 'cancel'
df['status'][df['kota'].str.contains('off')] = 'off'

df['status'] = df['status'].replace('', '[status]')

In [22]:
df

Unnamed: 0,tanggal_request,nama_lokasi,kota,delivery,tipe,jatuh_tempo,status
1,2010-09-21,pln kota,jkt-hub ii rpc,2010-10-07,522,2013-10-01,rpc
2,2010-09-21,perumnas,jkt-hub i rpc,2010-10-07,522,2013-10-01,rpc
3,2010-09-21,tip top depok,jkt-hub viii rpc,2010-10-07,522,2013-10-01,rpc
4,2010-09-21,tip top pdk bambu,jkt-hub vi rpc,2010-10-07,522,2013-10-01,rpc
5,2010-09-21,tip top rawamangun,jkt-hub vii,2010-10-07,529EPP7,2013-10-01,[status]
...,...,...,...,...,...,...,...
1195,2020-04-24,kk tuban,surabaya,2020-11-18,GRGW10,2024-12-01,[status]
1196,2020-04-24,kk sampang,surabaya,2020-11-18,GRGW10,2024-12-01,[status]
1198,2020-06-11,kk praya mataram,mataram,2020-09-10,GRGW10,2024-10-01,[status]
1206,2020-07-16,kk jombang,sidoarjo,2020-11-18,GRGW10,2024-12-01,[status]


In [23]:
df['request_tgl'] = df['tanggal_request'].str[-2:]
df['request_bln'] = df['tanggal_request'].str[5:7]
df['request_thn'] = df['tanggal_request'].str[:4]

df['request_tgl'] = df['request_tgl'][~df['request_tgl'].str.contains('-')]
df['request_bln'] = df['request_bln'][~df['request_bln'].str.contains('-')]
df['request_thn'] = df['request_thn'][~df['request_thn'].str.contains('-')]

df.dropna(inplace=True)

df['request_tgl'] = df['request_tgl'].astype(int)
df['request_bln'] = df['request_bln'].astype(int)
df['request_thn'] = df['request_thn'].astype(int)

In [24]:
df['delivery_tgl'] = df['delivery'].str[-2:]
df['delivery_bln'] = df['delivery'].str[5:7]
df['delivery_thn'] = df['delivery'].str[:4]

df['delivery_tgl'] = df['delivery_tgl'][~df['delivery_tgl'].str.contains('-')]
df['delivery_bln'] = df['delivery_bln'][~df['delivery_bln'].str.contains('-')]
df['delivery_thn'] = df['delivery_thn'][~df['delivery_thn'].str.contains('-')]

df.dropna(inplace=True)

df['delivery_tgl'] = df['delivery_tgl'].astype(int)
df['delivery_bln'] = df['delivery_bln'].astype(int)
df['delivery_thn'] = df['delivery_thn'].astype(int)

In [25]:
df['jtempo_tgl'] = df['jatuh_tempo'].str[-2:]
df['jtempo_bln'] = df['jatuh_tempo'].str[5:7]
df['jtempo_thn'] = df['jatuh_tempo'].str[:4]

df['jtempo_tgl'] = df['jtempo_tgl'][~df['jtempo_tgl'].str.contains('-')]
df['jtempo_bln'] = df['jtempo_bln'][~df['jtempo_bln'].str.contains('-')]
df['jtempo_thn'] = df['jtempo_thn'][~df['jtempo_thn'].str.contains('-')]

df.dropna(inplace=True)

df['jtempo_tgl'] = df['jtempo_tgl'].astype(int)
df['jtempo_bln'] = df['jtempo_bln'].astype(int)
df['jtempo_thn'] = df['jtempo_thn'].astype(int)

In [26]:
df

Unnamed: 0,tanggal_request,nama_lokasi,kota,delivery,tipe,jatuh_tempo,status,request_tgl,request_bln,request_thn,delivery_tgl,delivery_bln,delivery_thn,jtempo_tgl,jtempo_bln,jtempo_thn
1,2010-09-21,pln kota,jkt-hub ii rpc,2010-10-07,522,2013-10-01,rpc,21,9,2010,7,10,2010,1,10,2013
2,2010-09-21,perumnas,jkt-hub i rpc,2010-10-07,522,2013-10-01,rpc,21,9,2010,7,10,2010,1,10,2013
3,2010-09-21,tip top depok,jkt-hub viii rpc,2010-10-07,522,2013-10-01,rpc,21,9,2010,7,10,2010,1,10,2013
4,2010-09-21,tip top pdk bambu,jkt-hub vi rpc,2010-10-07,522,2013-10-01,rpc,21,9,2010,7,10,2010,1,10,2013
5,2010-09-21,tip top rawamangun,jkt-hub vii,2010-10-07,529EPP7,2013-10-01,[status],21,9,2010,7,10,2010,1,10,2013
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1195,2020-04-24,kk tuban,surabaya,2020-11-18,GRGW10,2024-12-01,[status],24,4,2020,18,11,2020,1,12,2024
1196,2020-04-24,kk sampang,surabaya,2020-11-18,GRGW10,2024-12-01,[status],24,4,2020,18,11,2020,1,12,2024
1198,2020-06-11,kk praya mataram,mataram,2020-09-10,GRGW10,2024-10-01,[status],11,6,2020,10,9,2020,1,10,2024
1206,2020-07-16,kk jombang,sidoarjo,2020-11-18,GRGW10,2024-12-01,[status],16,7,2020,18,11,2020,1,12,2024


In [27]:
df['tipe'] = df['tipe'].astype(str)

## Mengemas ke dalam dataframe baru

In [28]:
df_final = pd.DataFrame()

In [29]:
df_final['nama_lokasi'] = df['nama_lokasi']
df_final['kota'] = df['kota']
df_final['status'] = df['status']
df_final['tipe'] = df['tipe']
df_final['request_tgl'] = df['request_tgl']
df_final['request_bln'] = df['request_bln']
df_final['request_thn'] = df['request_thn']
df_final['delivery_tgl'] = df['delivery_tgl']
df_final['delivery_bln'] = df['delivery_bln']
df_final['delivery_thn'] = df['delivery_thn']
df_final['jtempo_tgl'] = df['jtempo_tgl']
df_final['jtempo_bln'] = df['jtempo_bln']
df_final['jtempo_thn'] = df['jtempo_thn']

In [30]:
df_final

Unnamed: 0,nama_lokasi,kota,status,tipe,request_tgl,request_bln,request_thn,delivery_tgl,delivery_bln,delivery_thn,jtempo_tgl,jtempo_bln,jtempo_thn
1,pln kota,jkt-hub ii rpc,rpc,522,21,9,2010,7,10,2010,1,10,2013
2,perumnas,jkt-hub i rpc,rpc,522,21,9,2010,7,10,2010,1,10,2013
3,tip top depok,jkt-hub viii rpc,rpc,522,21,9,2010,7,10,2010,1,10,2013
4,tip top pdk bambu,jkt-hub vi rpc,rpc,522,21,9,2010,7,10,2010,1,10,2013
5,tip top rawamangun,jkt-hub vii,[status],529EPP7,21,9,2010,7,10,2010,1,10,2013
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1195,kk tuban,surabaya,[status],GRGW10,24,4,2020,18,11,2020,1,12,2024
1196,kk sampang,surabaya,[status],GRGW10,24,4,2020,18,11,2020,1,12,2024
1198,kk praya mataram,mataram,[status],GRGW10,11,6,2020,10,9,2020,1,10,2024
1206,kk jombang,sidoarjo,[status],GRGW10,16,7,2020,18,11,2020,1,12,2024


Save ke excel

In [31]:
book = load_workbook('data-cleaned.xlsx')
writer = pd.ExcelWriter('data-cleaned.xlsx', engine = 'openpyxl')
writer.book = book

In [32]:
df_final.to_excel(writer, sheet_name='sewa_atm')
writer.save()
writer.close()

Load kembali

In [33]:
load_data = pd.read_excel('data-cleaned.xlsx', sheet_name='sewa_atm', index_col=0)
load_data

Unnamed: 0,nama_lokasi,kota,status,tipe,request_tgl,request_bln,request_thn,delivery_tgl,delivery_bln,delivery_thn,jtempo_tgl,jtempo_bln,jtempo_thn
1,pln kota,jkt-hub ii rpc,rpc,522,21,9,2010,7,10,2010,1,10,2013
2,perumnas,jkt-hub i rpc,rpc,522,21,9,2010,7,10,2010,1,10,2013
3,tip top depok,jkt-hub viii rpc,rpc,522,21,9,2010,7,10,2010,1,10,2013
4,tip top pdk bambu,jkt-hub vi rpc,rpc,522,21,9,2010,7,10,2010,1,10,2013
5,tip top rawamangun,jkt-hub vii,[status],529EPP7,21,9,2010,7,10,2010,1,10,2013
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1195,kk tuban,surabaya,[status],GRGW10,24,4,2020,18,11,2020,1,12,2024
1196,kk sampang,surabaya,[status],GRGW10,24,4,2020,18,11,2020,1,12,2024
1198,kk praya mataram,mataram,[status],GRGW10,11,6,2020,10,9,2020,1,10,2024
1206,kk jombang,sidoarjo,[status],GRGW10,16,7,2020,18,11,2020,1,12,2024
