In [2]:
# Data Source: Inside Airbnb
# Snapshot: June 2025
# File: listings.csv (summary, 18 columns)
# Goal: city-level preprocessing + light EDA
# Note: Do NOT drop columns, row-level cleaning only

In [3]:
# 구글 드라이브 마운트
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [4]:
#데이터로드 및 데이터크기 확인
import pandas as pd

path = '/content/drive/MyDrive/paris_listings_clean.csv'
df = pd.read_csv(path, low_memory=False)

df.shape

(53963, 18)

In [5]:
df.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,number_of_reviews_ltm,license,review_group
0,3109,zen and calm,3631,Anne,Observatoire,48.83191,2.3187,Entire home/apt,135.0,2,7,2025-06-03,0.08,1,355,3,7511409139079,Has Reviews
1,5396,Your perfect Paris studio on Île Saint-Louis,7903,Borzou,Hôtel-de-Ville,48.85247,2.35835,Entire home/apt,114.0,1,452,2025-06-05,2.32,1,69,48,7510402838018,Has Reviews
2,7397,MARAIS - 2ROOMS APT - 2/4 PEOPLE,2626,Franck,Hôtel-de-Ville,48.85909,2.35315,Entire home/apt,149.0,10,380,2025-06-03,2.2,1,197,25,7510400829623,Has Reviews
3,9359,"Cozy, Central Paris: WALK or VELIB EVERYWHERE !",28422,Bernadette,Louvre,48.86006,2.34863,Entire home/apt,75.0,180,0,,0.0,1,358,0,"Available with a mobility lease only (""bail mo...",No Reviews
4,11265,Elegant appartment in Montmartre,41718,Sylvie,Buttes-Montmartre,48.88494,2.33997,Entire home/apt,150.0,7,35,2025-05-31,0.32,1,59,5,7511801401834,Has Reviews


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53963 entries, 0 to 53962
Data columns (total 18 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   id                              53963 non-null  int64  
 1   name                            53963 non-null  object 
 2   host_id                         53963 non-null  int64  
 3   host_name                       53953 non-null  object 
 4   neighbourhood                   53963 non-null  object 
 5   latitude                        53963 non-null  float64
 6   longitude                       53963 non-null  float64
 7   room_type                       53963 non-null  object 
 8   price                           53963 non-null  float64
 9   minimum_nights                  53963 non-null  int64  
 10  number_of_reviews               53963 non-null  int64  
 11  last_review                     42306 non-null  object 
 12  reviews_per_month               

In [7]:
# 다른 csv 파일과 동일한 전처리 적용
# 1) price 결측 제거
df = df.dropna(subset=["price"])

# 2) 리뷰 결측 = 리뷰 없음
df["reviews_per_month"] = df["reviews_per_month"].fillna(0)

# 3) EUR -> USD 환율 (2025-06-05 매매기준율)
EUR_TO_USD_20250605 = 1570 / 1373.80  # ≈ 1.142

# 4) price_usd 생성 (소수점 1자리)
df["price_usd"] = (df["price"] * EUR_TO_USD_20250605).round(1)

In [8]:
df.isna().mean().sort_values(ascending=False).head(6)
df[["price", "price_usd"]].describe()

Unnamed: 0,price,price_usd
count,53963.0,53963.0
mean,285.263384,326.004816
std,688.616441,786.960942
min,8.0,9.1
25%,104.0,118.9
50%,161.0,184.0
75%,277.0,316.6
max,30814.0,35214.7


In [9]:
 # 저장 전 점검(핵심만)
print(df.isna().mean().sort_values(ascending=False).head(6))
print(df[["price", "price_usd"]].describe())

last_review    0.216018
license        0.106073
host_name      0.000185
host_id        0.000000
name           0.000000
id             0.000000
dtype: float64
              price     price_usd
count  53963.000000  53963.000000
mean     285.263384    326.004816
std      688.616441    786.960942
min        8.000000      9.100000
25%      104.000000    118.900000
50%      161.000000    184.000000
75%      277.000000    316.600000
max    30814.000000  35214.700000


In [10]:
df["price_usd"] = df["price_usd"].round(1)

In [11]:
print("price <= 0:", (df["price"] <= 0).sum())
print("price_usd <= 0:", (df["price_usd"] <= 0).sum())

price <= 0: 0
price_usd <= 0: 0


In [12]:
df.head(20)

Unnamed: 0,id,name,host_id,host_name,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,number_of_reviews_ltm,license,review_group,price_usd
0,3109,zen and calm,3631,Anne,Observatoire,48.83191,2.3187,Entire home/apt,135.0,2,7,2025-06-03,0.08,1,355,3,7511409139079,Has Reviews,154.3
1,5396,Your perfect Paris studio on Île Saint-Louis,7903,Borzou,Hôtel-de-Ville,48.85247,2.35835,Entire home/apt,114.0,1,452,2025-06-05,2.32,1,69,48,7510402838018,Has Reviews,130.3
2,7397,MARAIS - 2ROOMS APT - 2/4 PEOPLE,2626,Franck,Hôtel-de-Ville,48.85909,2.35315,Entire home/apt,149.0,10,380,2025-06-03,2.2,1,197,25,7510400829623,Has Reviews,170.3
3,9359,"Cozy, Central Paris: WALK or VELIB EVERYWHERE !",28422,Bernadette,Louvre,48.86006,2.34863,Entire home/apt,75.0,180,0,,0.0,1,358,0,"Available with a mobility lease only (""bail mo...",No Reviews,85.7
4,11265,Elegant appartment in Montmartre,41718,Sylvie,Buttes-Montmartre,48.88494,2.33997,Entire home/apt,150.0,7,35,2025-05-31,0.32,1,59,5,7511801401834,Has Reviews,171.4
5,11487,"Heart of Paris, brand new aparment.",42666,Brigitte,Popincourt,48.86441,2.37139,Entire home/apt,63.0,30,16,2025-06-06,0.1,1,241,5,"Available with a mobility lease only (""bail mo...",Has Reviews,72.0
6,11798,Amazing Loft in Paris,44444,Laurence,Gobelins,48.825241,2.367469,Entire home/apt,126.0,3,121,2023-10-21,0.78,1,349,0,Exempt - hotel-type listing,Has Reviews,144.0
7,12452,Voltaire Charm,48733,Irene,Popincourt,48.86159,2.37952,Entire home/apt,360.0,1,63,2025-05-24,0.73,1,8,4,7511102600669,Has Reviews,411.4
8,14903,SPACIEUX PARC DE LA VILLETTE,58645,Fred,Buttes-Chaumont,48.88279,2.38642,Entire home/apt,200.0,7,8,2012-10-24,0.05,1,173,0,7511911870085,Has Reviews,228.6
9,16626,Elegance in St Michel,64627,Phoenice,Luxembourg,48.8522,2.34114,Private room,162.0,3,176,2025-03-30,0.94,3,294,18,,Has Reviews,185.1


In [13]:
#도시필터 적용을 위해 city 컬럼 추가
df["city"] = "Paris"

In [14]:
save_path = "/content/drive/MyDrive/airbnb/clean/paris_listings_jun2025_summary_clean_uj.csv"
df.to_csv(save_path, index=False)
print("saved:", save_path)

saved: /content/drive/MyDrive/airbnb/clean/paris_listings_jun2025_summary_clean_uj.csv


In [17]:
print(df.shape)
df.info()

(53963, 20)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53963 entries, 0 to 53962
Data columns (total 20 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   id                              53963 non-null  int64  
 1   name                            53963 non-null  object 
 2   host_id                         53963 non-null  int64  
 3   host_name                       53953 non-null  object 
 4   neighbourhood                   53963 non-null  object 
 5   latitude                        53963 non-null  float64
 6   longitude                       53963 non-null  float64
 7   room_type                       53963 non-null  object 
 8   price                           53963 non-null  float64
 9   minimum_nights                  53963 non-null  int64  
 10  number_of_reviews               53963 non-null  int64  
 11  last_review                     42306 non-null  object 
 12  reviews_per_month   