In [1]:
import pandas as pd
import numpy as np
import glob

In [2]:
all_csv = glob.glob('*.csv')

In [3]:
list_df = []
for file in all_csv:
    try:
        df = pd.read_csv(file, delimiter=';')
        list_df.append(df)
    except Exception as e:
        print(f"Gagal membaca file {file}: {e}")

merged_df = pd.concat(list_df, ignore_index=True)

In [4]:
merged_df

Unnamed: 0,Nama Produk,Harga,Toko,Lokasi,Rating,Terjual,Unnamed: 6
0,komputer gaming,Rp3.300.000,Stock Gallery,Jakarta Selatan,,,
1,PC Gaming Komputer ex Warnet,Rp1.999.999,iMba Thrift,Tangerang Selatan,,,
2,Komputer PC All-in-One Baru Layar Lengkung Gam...,Rp4.883.700,ASVS Computer,Jakarta Pusat,5.0,1 terjual,
3,Komputer PC Gaming murah AMD nego,Rp3.900.000,PT Cahaya Digital Indonesia,Kab. Ponorogo,,,
4,PC Gaming I5 10400F komputer,Rp4.000.000,MG`Store,Bekasi,,,
...,...,...,...,...,...,...,...
379,Pc Rakitan i5 10400f | Rx 6600 Diatas rtx 2060...,Rp7.250.000,Rakit PC 212,Tangerang,,,
380,PC Hackintosh Sonoma Ryzen 5 7600X - B650 - 16...,Rp13.999.000,win-group,Jakarta Selatan,,,
381,DC Gaming Series PC | Intel I5 12400F | 16 GB ...,Rp8.340.000,dHafie Computer,Depok,,,
382,RAKITAN PC RYZEN 5 7500F | | RX 6600 8GB | 32G...,Rp14.483.000,GX COMP,Bandung,4.0,1 terjual,


In [5]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 384 entries, 0 to 383
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Nama Produk  355 non-null    object 
 1   Harga        355 non-null    object 
 2   Toko         355 non-null    object 
 3   Lokasi       355 non-null    object 
 4   Rating       143 non-null    float64
 5   Terjual      150 non-null    object 
 6   Unnamed: 6   1 non-null      object 
dtypes: float64(1), object(6)
memory usage: 21.1+ KB


In [6]:
merged_df.drop_duplicates(subset=['Nama Produk', 'Toko', 'Harga'], inplace=True)
print(f"Jumlah baris setelah menghapus duplikat: {len(merged_df)}")

Jumlah baris setelah menghapus duplikat: 296


In [7]:
merged_df.replace('N/A', np.nan, inplace=True)

In [8]:
merged_df['Harga'] = merged_df['Harga'].str.replace('Rp', '').str.replace('.', '', regex=False).astype(float)

In [9]:
def konversi_terjual(x):
    if pd.isna(x):
        return np.nan
    x = str(x).lower().replace(' terjual', '').replace('+', '').strip()
    if 'rb' in x:
        x = x.replace('rb', '')
        return float(x) * 1000
    return float(x)

merged_df['Terjual'] = merged_df['Terjual'].apply(konversi_terjual)

In [10]:
merged_df.drop(columns=['Unnamed: 6'], inplace=True, axis=1)

In [11]:
merged_df.head()

Unnamed: 0,Nama Produk,Harga,Toko,Lokasi,Rating,Terjual
0,komputer gaming,3300000.0,Stock Gallery,Jakarta Selatan,,
1,PC Gaming Komputer ex Warnet,1999999.0,iMba Thrift,Tangerang Selatan,,
2,Komputer PC All-in-One Baru Layar Lengkung Gam...,4883700.0,ASVS Computer,Jakarta Pusat,5.0,1.0
3,Komputer PC Gaming murah AMD nego,3900000.0,PT Cahaya Digital Indonesia,Kab. Ponorogo,,
4,PC Gaming I5 10400F komputer,4000000.0,MG`Store,Bekasi,,


In [12]:
merged_df.isnull().sum()

Nama Produk      1
Harga            1
Toko             1
Lokasi           1
Rating         184
Terjual        177
dtype: int64

In [13]:
column_for_change = ['Rating', 'Terjual']

merged_df[column_for_change] = merged_df[column_for_change].fillna(0)

In [14]:
cleaned_df = merged_df.dropna()

In [16]:
cleaned_df

Unnamed: 0,Nama Produk,Harga,Toko,Lokasi,Rating,Terjual
0,komputer gaming,3300000.0,Stock Gallery,Jakarta Selatan,0.0,0.0
1,PC Gaming Komputer ex Warnet,1999999.0,iMba Thrift,Tangerang Selatan,0.0,0.0
2,Komputer PC All-in-One Baru Layar Lengkung Gam...,4883700.0,ASVS Computer,Jakarta Pusat,5.0,1.0
3,Komputer PC Gaming murah AMD nego,3900000.0,PT Cahaya Digital Indonesia,Kab. Ponorogo,0.0,0.0
4,PC Gaming I5 10400F komputer,4000000.0,MG`Store,Bekasi,0.0,0.0
...,...,...,...,...,...,...
379,Pc Rakitan i5 10400f | Rx 6600 Diatas rtx 2060...,7250000.0,Rakit PC 212,Tangerang,0.0,0.0
380,PC Hackintosh Sonoma Ryzen 5 7600X - B650 - 16...,13999000.0,win-group,Jakarta Selatan,0.0,0.0
381,DC Gaming Series PC | Intel I5 12400F | 16 GB ...,8340000.0,dHafie Computer,Depok,0.0,0.0
382,RAKITAN PC RYZEN 5 7500F | | RX 6600 8GB | 32G...,14483000.0,GX COMP,Bandung,4.0,1.0
