# **Study Case 01 - Data Mining dan Business Intelligence: EDA pada data property**


In [183]:
import warnings; warnings.simplefilter('ignore')
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
import numpy as np
import matplotlib.cm as cm
from collections import Counter
import scipy, itertools
from scipy import stats
from sklearn.preprocessing import StandardScaler, MinMaxScaler
plt.style.use('bmh'); sns.set()
import requests
import io
from pandas_profiling import ProfileReport


# **Load data file csv dari akun GitHub**

In [184]:
# Loading Data:

try: # Running Locally, yakinkan "file_" berada di folder "data"
    dfJ = pd.read_csv("data/jakarta.csv", low_memory = False, encoding='utf8')
    dfD = pd.read_csv("data/depok.csv", low_memory = False, encoding='utf8')
    dfT = pd.read_csv("data/tangerang.csv", low_memory = False, encoding='utf8')
except: # Running in Google Colab
    !mkdir data
    !wget -P data/ https://raw.githubusercontent.com/taufiksutanto/DataMining-CaseStudies/main/data/jakarta.csv
    !wget -P data/ https://raw.githubusercontent.com/taufiksutanto/DataMining-CaseStudies/main/data/depok.csv
    !wget -P data/ https://raw.githubusercontent.com/taufiksutanto/DataMining-CaseStudies/main/data/tangerang.csv
    dfJ = pd.read_csv("data/jakarta.csv", low_memory = False, encoding='utf8')
    dfD = pd.read_csv("data/depok.csv", low_memory = False, encoding='utf8')
    dfT = pd.read_csv("data/tangerang.csv", low_memory = False, encoding='utf8')
    
dfJ.shape, dfD.shape, dfT.shape

((1024, 26), (998, 26), (128, 26))

# **Treatment Data Jakarta**

# Data Understanding

In [185]:
N, P = dfJ.shape # Ukuran Data
print('baris = ', N, ', Kolom (jumlah variabel) = ', P)
print("Tipe Variabe df = ", type(dfJ))
# "Melihat" beberapa data pertamanya
dfJ.head()

baris =  1024 , Kolom (jumlah variabel) =  26
Tipe Variabe df =  <class 'pandas.core.frame.DataFrame'>


Unnamed: 0,created_at,LT,LB,KT,KM,garasi,carport,lokasi,sertifikat,listrik,...,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25
0,06-12-2021,202.0,198.0,8.0,3.0,,,Jakarta Pusat,SHM,2200.0,...,,,,,,,,,,
1,02-12-2021,30.0,55.0,2.0,1.0,,1.0,Jakarta Pusat,SHM,2200.0,...,,,,,,,,,,
2,06-12-2021,19.0,35.0,2.0,1.0,,,Jakarta Pusat,SHM,2200.0,...,,,,,,,,,,
3,06-12-2021,33.0,42.0,2.0,2.0,,1.0,Jakarta Pusat,SHM,2200.0,...,,,,,,,,,,
4,06-12-2021,30.0,55.0,2.0,2.0,,1.0,Jakarta Pusat,SHM,2200.0,...,,,,,,,,,,


In [186]:
dfJ.tail()

Unnamed: 0,created_at,LT,LB,KT,KM,garasi,carport,lokasi,sertifikat,listrik,...,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25
1019,,,,,,,,,,,...,,,,,,,,,,
1020,,,,,,,,,,,...,,,,,,,,,,
1021,,,,,,,,,,,...,,,,,,,,,,
1022,,,,,,,,,,,...,,,,,,,,,,
1023,,,,,,,,,,,...,,,,,,,,,,


In [187]:
dfJ.sample(15)

Unnamed: 0,created_at,LT,LB,KT,KM,garasi,carport,lokasi,sertifikat,listrik,...,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25
377,,,,,,,,,,,...,,,,,,,,,,
288,,,,,,,,,,,...,,,,,,,,,,
865,,,,,,,,,,,...,,,,,,,,,,
895,,,,,,,,,,,...,,,,,,,,,,
95,07-12-2021,640.0,1200.0,6.0,5.0,,2.0,Jakarta Utara,SHM,9500.0,...,,,,,,,,,,
1008,,,,,,,,,,,...,,,,,,,,,,
74,06-12-2021,170.0,325.0,5.0,4.0,2.0,2.0,Jakarta Utara,SHM,3500.0,...,,,,,,,,,,
750,,,,,,,,,,,...,,,,,,,,,,
742,,,,,,,,,,,...,,,,,,,,,,
188,,,,,,,,,,,...,,,,,,,,,,


In [188]:
# drop kolom yang tidak terpakai
dfJ.drop(["created_at","Unnamed: 14","Unnamed: 15","Unnamed: 16","Unnamed: 17","Unnamed: 18",
          "Unnamed: 19","Unnamed: 20","Unnamed: 21","Unnamed: 22","Unnamed: 23",
          "Unnamed: 24","Unnamed: 25"], axis=1, inplace=True)
dfJ.head()

Unnamed: 0,LT,LB,KT,KM,garasi,carport,lokasi,sertifikat,listrik,hadap,harga,URL,deskripsi
0,202.0,198.0,8.0,3.0,,,Jakarta Pusat,SHM,2200.0,,4500000000.0,https://www.rumah.com/listing-properti/dijual-...,Dekat Cikini Salemba Taman Isamail Marzuki Kam...
1,30.0,55.0,2.0,1.0,,1.0,Jakarta Pusat,SHM,2200.0,,1100000000.0,https://www.rumah.com/listing-properti/dijual-...,"Mengusung konsep minimalis, dan didukung denga..."
2,19.0,35.0,2.0,1.0,,,Jakarta Pusat,SHM,2200.0,,395000000.0,https://www.rumah.com/listing-properti/dijual-...,"Dekat dengan Mall Atrium Senen, dekat dengan a..."
3,33.0,42.0,2.0,2.0,,1.0,Jakarta Pusat,SHM,2200.0,,835000000.0,https://www.rumah.com/listing-properti/dijual-...,"Berlokasi sangat strategis, sangat dekat ke Tu..."
4,30.0,55.0,2.0,2.0,,1.0,Jakarta Pusat,SHM,2200.0,,1100000000.0,https://www.rumah.com/listing-properti/dijual-...,"Bisa request ubah tata ruang, beli 2 unit mend..."


In [189]:
dfJ.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1024 entries, 0 to 1023
Data columns (total 13 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   LT          121 non-null    float64
 1   LB          121 non-null    float64
 2   KT          121 non-null    float64
 3   KM          121 non-null    float64
 4   garasi      44 non-null     float64
 5   carport     90 non-null     float64
 6   lokasi      121 non-null    object 
 7   sertifikat  119 non-null    object 
 8   listrik     120 non-null    float64
 9   hadap       36 non-null     object 
 10  harga       121 non-null    float64
 11  URL         121 non-null    object 
 12  deskripsi   112 non-null    object 
dtypes: float64(8), object(5)
memory usage: 104.1+ KB


In [190]:
dfJ.drop(dfJ.tail(903).index,inplace=True)

In [191]:
dfJ

Unnamed: 0,LT,LB,KT,KM,garasi,carport,lokasi,sertifikat,listrik,hadap,harga,URL,deskripsi
0,202.0,198.0,8.0,3.0,,,Jakarta Pusat,SHM,2200.0,,4.500000e+09,https://www.rumah.com/listing-properti/dijual-...,Dekat Cikini Salemba Taman Isamail Marzuki Kam...
1,30.0,55.0,2.0,1.0,,1.0,Jakarta Pusat,SHM,2200.0,,1.100000e+09,https://www.rumah.com/listing-properti/dijual-...,"Mengusung konsep minimalis, dan didukung denga..."
2,19.0,35.0,2.0,1.0,,,Jakarta Pusat,SHM,2200.0,,3.950000e+08,https://www.rumah.com/listing-properti/dijual-...,"Dekat dengan Mall Atrium Senen, dekat dengan a..."
3,33.0,42.0,2.0,2.0,,1.0,Jakarta Pusat,SHM,2200.0,,8.350000e+08,https://www.rumah.com/listing-properti/dijual-...,"Berlokasi sangat strategis, sangat dekat ke Tu..."
4,30.0,55.0,2.0,2.0,,1.0,Jakarta Pusat,SHM,2200.0,,1.100000e+09,https://www.rumah.com/listing-properti/dijual-...,"Bisa request ubah tata ruang, beli 2 unit mend..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
116,220.0,239.0,5.0,4.0,1.0,2.0,Jakarta Timur,PPJB,3500.0,,3.850000e+09,https://www.rumah.com/listing-properti/dijual-...,Rumah Mewah siap huni 2 lantai 220m 10x22 type...
117,70.0,57.0,3.0,2.0,,2.0,Jakarta Timur,SHM,2200.0,,8.800000e+08,https://www.rumah.com/listing-properti/dijual-...,Rumah Signature Location & Access :\n• 2 Menit...
118,315.0,280.0,4.0,3.0,1.0,2.0,Jakarta Timur,SHM + IMB,3500.0,Timur,4.300000e+09,https://www.rumah.com/listing-properti/dijual-...,"Rumah 1, 5 Lantai Luas Strategis di Pondok Bam..."
119,135.0,190.0,5.0,5.0,,2.0,Jakarta Timur,SHM + IMB,2200.0,Selatan,3.400000e+09,https://www.rumah.com/listing-properti/dijual-...,Rumah Baru Non Komplek Posisi Hook Lokasi Stra...


In [192]:
dfJ.duplicated().sum()

0

In [193]:
dfJ.isnull().sum()

LT             0
LB             0
KT             0
KM             0
garasi        77
carport       31
lokasi         0
sertifikat     2
listrik        1
hadap         85
harga          0
URL            0
deskripsi      9
dtype: int64

In [194]:
(dfJ.isnull().sum()/len(dfJ)).to_frame('persentase missing')

Unnamed: 0,persentase missing
LT,0.0
LB,0.0
KT,0.0
KM,0.0
garasi,0.636364
carport,0.256198
lokasi,0.0
sertifikat,0.016529
listrik,0.008264
hadap,0.702479


In [195]:
# Mengatasi missing value dengan drop karena variabel random (missing value sedikit)
dfJ.dropna(subset=["LT", "LB", "KT", "KM", "lokasi", "sertifikat", "URL", "listrik", "harga", "deskripsi"])

Unnamed: 0,LT,LB,KT,KM,garasi,carport,lokasi,sertifikat,listrik,hadap,harga,URL,deskripsi
0,202.0,198.0,8.0,3.0,,,Jakarta Pusat,SHM,2200.0,,4.500000e+09,https://www.rumah.com/listing-properti/dijual-...,Dekat Cikini Salemba Taman Isamail Marzuki Kam...
1,30.0,55.0,2.0,1.0,,1.0,Jakarta Pusat,SHM,2200.0,,1.100000e+09,https://www.rumah.com/listing-properti/dijual-...,"Mengusung konsep minimalis, dan didukung denga..."
2,19.0,35.0,2.0,1.0,,,Jakarta Pusat,SHM,2200.0,,3.950000e+08,https://www.rumah.com/listing-properti/dijual-...,"Dekat dengan Mall Atrium Senen, dekat dengan a..."
3,33.0,42.0,2.0,2.0,,1.0,Jakarta Pusat,SHM,2200.0,,8.350000e+08,https://www.rumah.com/listing-properti/dijual-...,"Berlokasi sangat strategis, sangat dekat ke Tu..."
4,30.0,55.0,2.0,2.0,,1.0,Jakarta Pusat,SHM,2200.0,,1.100000e+09,https://www.rumah.com/listing-properti/dijual-...,"Bisa request ubah tata ruang, beli 2 unit mend..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
116,220.0,239.0,5.0,4.0,1.0,2.0,Jakarta Timur,PPJB,3500.0,,3.850000e+09,https://www.rumah.com/listing-properti/dijual-...,Rumah Mewah siap huni 2 lantai 220m 10x22 type...
117,70.0,57.0,3.0,2.0,,2.0,Jakarta Timur,SHM,2200.0,,8.800000e+08,https://www.rumah.com/listing-properti/dijual-...,Rumah Signature Location & Access :\n• 2 Menit...
118,315.0,280.0,4.0,3.0,1.0,2.0,Jakarta Timur,SHM + IMB,3500.0,Timur,4.300000e+09,https://www.rumah.com/listing-properti/dijual-...,"Rumah 1, 5 Lantai Luas Strategis di Pondok Bam..."
119,135.0,190.0,5.0,5.0,,2.0,Jakarta Timur,SHM + IMB,2200.0,Selatan,3.400000e+09,https://www.rumah.com/listing-properti/dijual-...,Rumah Baru Non Komplek Posisi Hook Lokasi Stra...


In [196]:
# mengatasi missing value dengan mean karena tipe variabel data numerik
rata_garasi = dfJ['garasi'].mean()
dfJ['garasi'] = dfJ['garasi'].fillna(rata_garasi)
dfJ['garasi'].isna().sum()

0

In [197]:
# mengatasi missing value dengan mean karena tipe variabel data numerik
rata_carport = dfJ['carport'].mean()
dfJ['carport'] = dfJ['carport'].fillna(rata_carport)
dfJ['carport'].isna().sum()

0

In [198]:
# mengatasi missing value dengan mean karena tipe variabel data kategorik
modus_hadap = dfJ['hadap'].mode()[0]
dfJ['hadap'] = dfJ['hadap'].fillna(modus_hadap)
dfJ['hadap'].isna().sum()

0

In [199]:
dfJ

Unnamed: 0,LT,LB,KT,KM,garasi,carport,lokasi,sertifikat,listrik,hadap,harga,URL,deskripsi
0,202.0,198.0,8.0,3.0,1.863636,1.955556,Jakarta Pusat,SHM,2200.0,Timur,4.500000e+09,https://www.rumah.com/listing-properti/dijual-...,Dekat Cikini Salemba Taman Isamail Marzuki Kam...
1,30.0,55.0,2.0,1.0,1.863636,1.000000,Jakarta Pusat,SHM,2200.0,Timur,1.100000e+09,https://www.rumah.com/listing-properti/dijual-...,"Mengusung konsep minimalis, dan didukung denga..."
2,19.0,35.0,2.0,1.0,1.863636,1.955556,Jakarta Pusat,SHM,2200.0,Timur,3.950000e+08,https://www.rumah.com/listing-properti/dijual-...,"Dekat dengan Mall Atrium Senen, dekat dengan a..."
3,33.0,42.0,2.0,2.0,1.863636,1.000000,Jakarta Pusat,SHM,2200.0,Timur,8.350000e+08,https://www.rumah.com/listing-properti/dijual-...,"Berlokasi sangat strategis, sangat dekat ke Tu..."
4,30.0,55.0,2.0,2.0,1.863636,1.000000,Jakarta Pusat,SHM,2200.0,Timur,1.100000e+09,https://www.rumah.com/listing-properti/dijual-...,"Bisa request ubah tata ruang, beli 2 unit mend..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
116,220.0,239.0,5.0,4.0,1.000000,2.000000,Jakarta Timur,PPJB,3500.0,Timur,3.850000e+09,https://www.rumah.com/listing-properti/dijual-...,Rumah Mewah siap huni 2 lantai 220m 10x22 type...
117,70.0,57.0,3.0,2.0,1.863636,2.000000,Jakarta Timur,SHM,2200.0,Timur,8.800000e+08,https://www.rumah.com/listing-properti/dijual-...,Rumah Signature Location & Access :\n• 2 Menit...
118,315.0,280.0,4.0,3.0,1.000000,2.000000,Jakarta Timur,SHM + IMB,3500.0,Timur,4.300000e+09,https://www.rumah.com/listing-properti/dijual-...,"Rumah 1, 5 Lantai Luas Strategis di Pondok Bam..."
119,135.0,190.0,5.0,5.0,1.863636,2.000000,Jakarta Timur,SHM + IMB,2200.0,Selatan,3.400000e+09,https://www.rumah.com/listing-properti/dijual-...,Rumah Baru Non Komplek Posisi Hook Lokasi Stra...


In [200]:
dfJ['KT'] = dfJ['KT'] .astype('int64')
dfJ['KM'] = dfJ['KM'] .astype('int64')
dfJ['garasi'] = dfJ['garasi'] .astype('int64')
dfJ['carport'] = dfJ['carport'] .astype('int64')
dfJ['lokasi'] = dfJ['lokasi'] .astype('category')
dfJ['sertifikat'] = dfJ['sertifikat'] .astype('category')
dfJ['listrik'] = dfJ['listrik'] .astype('category')
dfJ['hadap'] = dfJ['hadap'] .astype('category')
dfJ['harga'] = dfJ['harga'] .astype('int64')

dfJ.dtypes

LT             float64
LB             float64
KT               int64
KM               int64
garasi           int64
carport          int64
lokasi        category
sertifikat    category
listrik       category
hadap         category
harga            int64
URL             object
deskripsi       object
dtype: object

In [201]:
dfJ

Unnamed: 0,LT,LB,KT,KM,garasi,carport,lokasi,sertifikat,listrik,hadap,harga,URL,deskripsi
0,202.0,198.0,8,3,1,1,Jakarta Pusat,SHM,2200.0,Timur,4500000000,https://www.rumah.com/listing-properti/dijual-...,Dekat Cikini Salemba Taman Isamail Marzuki Kam...
1,30.0,55.0,2,1,1,1,Jakarta Pusat,SHM,2200.0,Timur,1100000000,https://www.rumah.com/listing-properti/dijual-...,"Mengusung konsep minimalis, dan didukung denga..."
2,19.0,35.0,2,1,1,1,Jakarta Pusat,SHM,2200.0,Timur,395000000,https://www.rumah.com/listing-properti/dijual-...,"Dekat dengan Mall Atrium Senen, dekat dengan a..."
3,33.0,42.0,2,2,1,1,Jakarta Pusat,SHM,2200.0,Timur,835000000,https://www.rumah.com/listing-properti/dijual-...,"Berlokasi sangat strategis, sangat dekat ke Tu..."
4,30.0,55.0,2,2,1,1,Jakarta Pusat,SHM,2200.0,Timur,1100000000,https://www.rumah.com/listing-properti/dijual-...,"Bisa request ubah tata ruang, beli 2 unit mend..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
116,220.0,239.0,5,4,1,2,Jakarta Timur,PPJB,3500.0,Timur,3850000000,https://www.rumah.com/listing-properti/dijual-...,Rumah Mewah siap huni 2 lantai 220m 10x22 type...
117,70.0,57.0,3,2,1,2,Jakarta Timur,SHM,2200.0,Timur,880000000,https://www.rumah.com/listing-properti/dijual-...,Rumah Signature Location & Access :\n• 2 Menit...
118,315.0,280.0,4,3,1,2,Jakarta Timur,SHM + IMB,3500.0,Timur,4300000000,https://www.rumah.com/listing-properti/dijual-...,"Rumah 1, 5 Lantai Luas Strategis di Pondok Bam..."
119,135.0,190.0,5,5,1,2,Jakarta Timur,SHM + IMB,2200.0,Selatan,3400000000,https://www.rumah.com/listing-properti/dijual-...,Rumah Baru Non Komplek Posisi Hook Lokasi Stra...


# VISUALISASI


In [203]:
# Mengetahui hubungan antara variabel
dfJ.corr()

Unnamed: 0,LT,LB,KT,KM,garasi,carport,harga
LT,1.0,0.414775,0.178123,0.104909,0.259818,0.292076,0.390733
LB,0.414775,1.0,0.572796,0.581507,0.374305,0.562148,0.804563
KT,0.178123,0.572796,1.0,0.806234,0.169255,0.352171,0.375735
KM,0.104909,0.581507,0.806234,1.0,0.105066,0.387357,0.394783
garasi,0.259818,0.374305,0.169255,0.105066,1.0,0.0124,0.345484
carport,0.292076,0.562148,0.352171,0.387357,0.0124,1.0,0.578811
harga,0.390733,0.804563,0.375735,0.394783,0.345484,0.578811,1.0


In [230]:
p = sns.catplot(x="harga", data=dfJ)
p 

<seaborn.axisgrid.FacetGrid at 0x7f4c6d35d0d0>

In [232]:
p1 = sns.catplot(x="LB", y="harga", data=dfJ)
p1

<seaborn.axisgrid.FacetGrid at 0x7f4c6d35dfd0>

 Kualitas data yang dimiliki perusahaan kurang baik, dikarenakan terdapat banyak informasi atau spesifikasi yang belum jelas atau kosong, sehingga perlu dilakukan drop/dropna atau penghapusan beberapa kolom dan baris pada data. Lalu ada baiknya DE/DBA ketika melakukan pengumpulan data untuk mencari informasi yang mendetail sehingga tidak ada data yang kosong agar RPPI menjadi data diven company yang unggul. Kemudian dari visualisasi di atas, sepertinya ada kecenderungan bahwa harga rumah di kota Depok lebih murah dibandingkan harga rumah di kota Jakarta, ada baiknya RPPI membuat kantor cabang berikutnya di Depok Di mana kota ini tersedia banyak harga rumah yang terjangkau. Lalu dari data yang ada, rumah dengan karakteristik adalah rumah yang paling banyak dijual.
Jika RPPI ingin melakukan investasi max Rp.25 Milyar minggu besok, dari data, kita dapat memberikan rekomendasi rumah mana saja yang akan berpotensi menghasilkan keuntungan bagi perusahaan dikarenakan lokasinya yang strategis, yaitu terletak pada kawasan yang ramai dan mudah diakses. Serta rumah-rumah tersebut cukup luas dan memiliki fasilitas yang memadai sehingga berpotensi untuk dijadikan sebagai bisnis sewa yang akan menghasilkan keuntungan bagi perusahaan RPPI.

# Melihat informasi dari data csv Depok

In [228]:
N, P = dfD.shape # Ukuran Data
print('baris = ', N, ', Kolom (jumlah variabel) = ', P)
print("Tipe Variabe df = ", type(dfD))
# "Melihat" beberapa data pertamanya
dfD.head()

baris =  118 , Kolom (jumlah variabel) =  13
Tipe Variabe df =  <class 'pandas.core.frame.DataFrame'>


Unnamed: 0,LT,LB,KT,KM,garasi,carport,lokasi,sertifikat,listrik,hadap,harga,URL,deskripsi
0,165,200.0,4,2,1,1,Sukmajaya,SHM,2200.0,Selatan,2100,https://www.rumah123.com/properti/depok/hos928...,Rumah siap huni komplek Pelni
1,50,45.0,2,1,1,1,Cipayung,SHM,1300.0,Utara,250,https://www.rumah123.com/properti/depok/hos928...,rumah minimalis termurah di citayam
2,50,36.0,2,1,1,1,Sawangan,SHM,1300.0,Barat,250,https://www.rumah123.com/properti/depok/hos928...,CLUSTER TERMURAH DI SAWANGAN KOTA DEPOK
3,50,45.0,2,1,1,1,Cimanggis,SHM,1300.0,Utara,250,https://www.rumah123.com/properti/depok/hos928...,RUMAH SIAP HUNI HARGA NEGO DEKAT STASIUN CITAYAM
4,72,45.0,2,1,1,1,Sawangan,SHM,1300.0,Timur,350,https://www.rumah123.com/properti/depok/hos928...,Rumah Cluster Siap Huni Depok


In [229]:
dfD.tail()

Unnamed: 0,LT,LB,KT,KM,garasi,carport,lokasi,sertifikat,listrik,hadap,harga,URL,deskripsi
113,79,115.0,2,1,1,1,Sukmajaya,SHM,2200.0,Utara,750,https://www.rumah123.com/properti/depok/hos928...,Rumah di Depok Timur
114,72,45.0,2,1,1,1,Cimanggis,SHM,2200.0,Utara,1000,https://www.rumah123.com/properti/depok/hos928...,PERUMAHAN NUANSA PERMAI DEPOK
115,40,40.0,2,1,1,1,Cipayung,SHM,1300.0,Timur,195,https://www.rumah123.com/properti/depok/hos928...,rumah semi minimalis manis harga ekonomis di c...
116,50,50.0,2,1,1,1,Cipayung,SHM,1300.0,Selatan,185,https://www.rumah123.com/properti/depok/hos927...,rumah semi minimalis dua kamar siap huni di ci...
117,100,15.0,3,3,1,1,Cinere,SHM,3500.0,Utara,1750,https://www.rumah123.com/properti/depok/hos928...,Dijual cepat rumah bagus siap huni


In [211]:
dfD.sample(15)

Unnamed: 0,created_at,LT,LB,KT,KM,garasi,carport,lokasi,sertifikat,listrik,...,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25
55,5-12-2021,92.0,110.0,3.0,1.0,1.0,1.0,Cinere,SHM,2200.0,...,,,,,,,,,,
233,,,,,,,,,,,...,,,,,,,,,,
595,,,,,,,,,,,...,,,,,,,,,,
226,,,,,,,,,,,...,,,,,,,,,,
662,,,,,,,,,,,...,,,,,,,,,,
145,,,,,,,,,,,...,,,,,,,,,,
718,,,,,,,,,,,...,,,,,,,,,,
547,,,,,,,,,,,...,,,,,,,,,,
393,,,,,,,,,,,...,,,,,,,,,,
341,,,,,,,,,,,...,,,,,,,,,,


In [212]:
# drop kolom yang tidak terpakai
dfD.drop(["created_at","Unnamed: 14","Unnamed: 15","Unnamed: 16","Unnamed: 17","Unnamed: 18",
          "Unnamed: 19","Unnamed: 20","Unnamed: 21","Unnamed: 22","Unnamed: 23",
          "Unnamed: 24","Unnamed: 25"], axis=1, inplace=True)
dfD.head()

Unnamed: 0,LT,LB,KT,KM,garasi,carport,lokasi,sertifikat,listrik,hadap,harga,URL,deskripsi
0,165,200.0,4.0,2.0,,1.0,Sukmajaya,SHM,2200.0,Selatan,2100.0,https://www.rumah123.com/properti/depok/hos928...,Rumah siap huni komplek Pelni
1,50,45.0,2.0,1.0,,,Cipayung,SHM,1300.0,,250.0,https://www.rumah123.com/properti/depok/hos928...,rumah minimalis termurah di citayam
2,50,36.0,2.0,1.0,,1.0,Sawangan,SHM,1300.0,Barat,250.0,https://www.rumah123.com/properti/depok/hos928...,CLUSTER TERMURAH DI SAWANGAN KOTA DEPOK
3,50,45.0,2.0,1.0,,,Cimanggis,SHM,1300.0,,250.0,https://www.rumah123.com/properti/depok/hos928...,RUMAH SIAP HUNI HARGA NEGO DEKAT STASIUN CITAYAM
4,72,45.0,2.0,1.0,,1.0,Sawangan,SHM,1300.0,Timur,350.0,https://www.rumah123.com/properti/depok/hos928...,Rumah Cluster Siap Huni Depok


In [213]:
dfD.drop(dfD.tail(880).index,inplace=True)

In [214]:
dfD.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 118 entries, 0 to 117
Data columns (total 13 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   LT          118 non-null    object 
 1   LB          118 non-null    float64
 2   KT          118 non-null    float64
 3   KM          118 non-null    float64
 4   garasi      25 non-null     float64
 5   carport     94 non-null     float64
 6   lokasi      118 non-null    object 
 7   sertifikat  118 non-null    object 
 8   listrik     103 non-null    float64
 9   hadap       62 non-null     object 
 10  harga       118 non-null    float64
 11  URL         118 non-null    object 
 12  deskripsi   118 non-null    object 
dtypes: float64(7), object(6)
memory usage: 12.9+ KB


In [215]:
dfD

Unnamed: 0,LT,LB,KT,KM,garasi,carport,lokasi,sertifikat,listrik,hadap,harga,URL,deskripsi
0,165,200.0,4.0,2.0,,1.0,Sukmajaya,SHM,2200.0,Selatan,2100.0,https://www.rumah123.com/properti/depok/hos928...,Rumah siap huni komplek Pelni
1,50,45.0,2.0,1.0,,,Cipayung,SHM,1300.0,,250.0,https://www.rumah123.com/properti/depok/hos928...,rumah minimalis termurah di citayam
2,50,36.0,2.0,1.0,,1.0,Sawangan,SHM,1300.0,Barat,250.0,https://www.rumah123.com/properti/depok/hos928...,CLUSTER TERMURAH DI SAWANGAN KOTA DEPOK
3,50,45.0,2.0,1.0,,,Cimanggis,SHM,1300.0,,250.0,https://www.rumah123.com/properti/depok/hos928...,RUMAH SIAP HUNI HARGA NEGO DEKAT STASIUN CITAYAM
4,72,45.0,2.0,1.0,,1.0,Sawangan,SHM,1300.0,Timur,350.0,https://www.rumah123.com/properti/depok/hos928...,Rumah Cluster Siap Huni Depok
...,...,...,...,...,...,...,...,...,...,...,...,...,...
113,79,115.0,2.0,1.0,1.0,1.0,Sukmajaya,SHM,,,750.0,https://www.rumah123.com/properti/depok/hos928...,Rumah di Depok Timur
114,72,45.0,2.0,1.0,,1.0,Cimanggis,SHM,,,1000.0,https://www.rumah123.com/properti/depok/hos928...,PERUMAHAN NUANSA PERMAI DEPOK
115,40,40.0,2.0,1.0,,,Cipayung,SHM,1300.0,Timur,195.0,https://www.rumah123.com/properti/depok/hos928...,rumah semi minimalis manis harga ekonomis di c...
116,50,50.0,2.0,1.0,,,Cipayung,SHM,1300.0,Selatan,185.0,https://www.rumah123.com/properti/depok/hos927...,rumah semi minimalis dua kamar siap huni di ci...


In [216]:
dfD.duplicated().sum()

0

In [217]:
dfD.isnull().sum()

LT             0
LB             0
KT             0
KM             0
garasi        93
carport       24
lokasi         0
sertifikat     0
listrik       15
hadap         56
harga          0
URL            0
deskripsi      0
dtype: int64

In [218]:
(dfD.isnull().sum()/len(dfD)).to_frame('persentase missing')

Unnamed: 0,persentase missing
LT,0.0
LB,0.0
KT,0.0
KM,0.0
garasi,0.788136
carport,0.20339
lokasi,0.0
sertifikat,0.0
listrik,0.127119
hadap,0.474576


In [219]:
# mengatasi missing value dengan mean karena tipe variabel data kategorik
modus_listrik = dfD['listrik'].mode()[0]
dfD['listrik'] = dfD['listrik'].fillna(modus_listrik)
dfD['listrik'].isna().sum()

0

In [220]:
# mengatasi missing value dengan mean karena tipe variabel data numerik
rata_garasi = dfD['garasi'].mean()
dfD['garasi'] = dfD['garasi'].fillna(rata_garasi)
dfD['garasi'].isna().sum()

0

In [221]:
# mengatasi missing value dengan mean karena tipe variabel data numerik
rata_carport = dfD['carport'].mean()
dfD['carport'] = dfD['carport'].fillna(rata_carport)
dfD['carport'].isna().sum()

0

In [222]:
# mengatasi missing value dengan mean karena tipe variabel data kategorik
modus_hadap = dfD['hadap'].mode()[0]
dfD['hadap'] = dfD['hadap'].fillna(modus_hadap)
dfD['hadap'].isna().sum()

0

In [223]:
dfD

Unnamed: 0,LT,LB,KT,KM,garasi,carport,lokasi,sertifikat,listrik,hadap,harga,URL,deskripsi
0,165,200.0,4.0,2.0,1.76,1.000000,Sukmajaya,SHM,2200.0,Selatan,2100.0,https://www.rumah123.com/properti/depok/hos928...,Rumah siap huni komplek Pelni
1,50,45.0,2.0,1.0,1.76,1.670213,Cipayung,SHM,1300.0,Utara,250.0,https://www.rumah123.com/properti/depok/hos928...,rumah minimalis termurah di citayam
2,50,36.0,2.0,1.0,1.76,1.000000,Sawangan,SHM,1300.0,Barat,250.0,https://www.rumah123.com/properti/depok/hos928...,CLUSTER TERMURAH DI SAWANGAN KOTA DEPOK
3,50,45.0,2.0,1.0,1.76,1.670213,Cimanggis,SHM,1300.0,Utara,250.0,https://www.rumah123.com/properti/depok/hos928...,RUMAH SIAP HUNI HARGA NEGO DEKAT STASIUN CITAYAM
4,72,45.0,2.0,1.0,1.76,1.000000,Sawangan,SHM,1300.0,Timur,350.0,https://www.rumah123.com/properti/depok/hos928...,Rumah Cluster Siap Huni Depok
...,...,...,...,...,...,...,...,...,...,...,...,...,...
113,79,115.0,2.0,1.0,1.00,1.000000,Sukmajaya,SHM,2200.0,Utara,750.0,https://www.rumah123.com/properti/depok/hos928...,Rumah di Depok Timur
114,72,45.0,2.0,1.0,1.76,1.000000,Cimanggis,SHM,2200.0,Utara,1000.0,https://www.rumah123.com/properti/depok/hos928...,PERUMAHAN NUANSA PERMAI DEPOK
115,40,40.0,2.0,1.0,1.76,1.670213,Cipayung,SHM,1300.0,Timur,195.0,https://www.rumah123.com/properti/depok/hos928...,rumah semi minimalis manis harga ekonomis di c...
116,50,50.0,2.0,1.0,1.76,1.670213,Cipayung,SHM,1300.0,Selatan,185.0,https://www.rumah123.com/properti/depok/hos927...,rumah semi minimalis dua kamar siap huni di ci...


In [224]:
dfD['KT'] = dfD['KT'] .astype('int64')
dfD['KM'] = dfD['KM'] .astype('int64')
dfD['garasi'] = dfD['garasi'] .astype('int64')
dfD['carport'] = dfD['carport'] .astype('int64')
dfD['lokasi'] = dfD['lokasi'] .astype('category')
dfD['sertifikat'] = dfD['sertifikat'] .astype('category')
dfD['listrik'] = dfD['listrik'] .astype('category')
dfD['hadap'] = dfD['hadap'] .astype('category')
dfD['harga'] = dfD['harga'] .astype('int64')

dfD.dtypes

LT              object
LB             float64
KT               int64
KM               int64
garasi           int64
carport          int64
lokasi        category
sertifikat    category
listrik       category
hadap         category
harga            int64
URL             object
deskripsi       object
dtype: object

#  VISUALISASI

In [None]:
q = sns.catplot(x="harga", data=dfD)
q 

# Melihat informasi dari data csv Tangerang

In [None]:
N, P = dfT.shape # Ukuran Data
print('baris = ', N, ', Kolom (jumlah variabel) = ', P)
print("Tipe Variabe df = ", type(dfT))
# "Melihat" beberapa data pertamanya
dfT.head(9)

In [None]:
dfT.tail()

In [None]:
dfT.sample(15)

In [None]:
# drop kolom yang tidak terpakai
dfT.drop(["created_at","Unnamed: 14","Unnamed: 15","Unnamed: 16","Unnamed: 17","Unnamed: 18",
          "Unnamed: 19","Unnamed: 20","Unnamed: 21","Unnamed: 22","Unnamed: 23",
          "Unnamed: 24","Unnamed: 25"], axis=1, inplace=True)
dfT.head()

In [None]:
dfT.drop(dfT.tail(4).index,inplace=True)

In [None]:
dfT

In [None]:
dfT.duplicated().sum()

In [None]:
#drop duplicated data
dfT.drop_duplicates(inplace=True) 
print(dfT.duplicated().sum()) 
print(dfT.shape) 

In [None]:
print(dfT.isnull().sum())

In [None]:
(dfJ.isnull().sum()/len(dfJ)).to_frame('persentase missing')

In [None]:
# Mengatasi missing value dengan drop karena variabel random (missing value sedikit)
dfJ.dropna(subset=["LT", "LB", "KT", "KM", "lokasi", "sertifikat", "URL", "listrik", "harga", "deskripsi"])

In [None]:
# mengatasi missing value dengan mean karena tipe variabel data numerik
rata_garasi = dfJ['garasi'].mean()
dfJ['garasi'] = dfJ['garasi'].fillna(rata_garasi)
dfJ['garasi'].isna().sum()

In [None]:
# mengatasi missing value dengan mean karena tipe variabel data numerik
rata_carport = dfJ['carport'].mean()
dfJ['carport'] = dfJ['carport'].fillna(rata_carport)
dfJ['carport'].isna().sum()

In [None]:
# mengatasi missing value dengan mean karena tipe variabel data kategorik
modus_hadap = dfJ['hadap'].mode()[0]
dfJ['hadap'] = dfJ['hadap'].fillna(modus_hadap)
dfJ['hadap'].isna().sum()

In [None]:
dfT

In [None]:
dfT['KT'] = dfT['KT'] .astype('int64')
dfT['KM'] = dfT['KM'] .astype('int64')
dfT['garasi'] = dfT['garasi'] .astype('int64')
dfT['carport'] = dfT['carport'] .astype('int64')
dfT['lokasi'] = dfT['lokasi'] .astype('category')
dfT['sertifikat'] = dfT['sertifikat'] .astype('category')
dfT['listrik'] = dfT['listrik'] .astype('category')
dfT['hadap'] = dfT['hadap'] .astype('category')
dfT['harga'] = dfT['harga'] .astype('int64')

dfT.dtypes

# VISUALISASI

In [None]:
r = sns.catplot(x="harga", data=dfT)
r 