In [1]:
import pandas as pd
import json
import numpy as np
import seaborn as sn
import pickle

from matplotlib import pyplot as plt
from io import StringIO
%matplotlib inline

In [None]:
import os
import ssl
import requests

from tqdm import tqdm
from PIL import Image
from io import BytesIO
from urllib import request

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
input_dir = '/content/drive/MyDrive/colab_nbs/airbnb/input/'
output_dir = '/content/drive/MyDrive/colab_nbs/airbnb/output/'
os.makedirs(output_dir, exist_ok=True)
print(f"Directory ensures existence: {output_dir}")
images_dir = '/content/drive/MyDrive/colab_nbs/airbnb/images'
os.makedirs(extracted_dir, exist_ok=True)
print(f"Directory ensures existence: {extracted_dir}")

Directory ensures existence: /content/drive/MyDrive/colab_nbs/airbnb/output/
Directory ensures existence: /content/drive/MyDrive/colab_nbs/airbnb/images


In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('future.no_silent_downcasting', True)

In [None]:
df_listings_details = pd.read_csv(input_dir + 'listings_detailed.csv')
df_listings = pd.read_csv(input_dir + 'listings.csv')

In [None]:
selected_columns = ['id','room_type', 'minimum_nights', 'neighbourhood',
   'availability_eoy', 'availability_365', 'picture_url',
    'host_response_rate', 'host_acceptance_rate', 'host_is_superhost', 'host_identity_verified',
    'accommodates', 'bathrooms', 'bedrooms', 'beds',
    'estimated_occupancy_l365d', 'estimated_revenue_l365d',
    'number_of_reviews', 'number_of_reviews_l30d', 'reviews_per_month',
    'review_scores_rating', 'review_scores_value',
    'instant_bookable', 'calculated_host_listings_count', 'price']


In [None]:
def normalize_tf_cols(df, column):
    df[column] = df[column].replace({'t': 1, 'f': 0}).astype(bool)
    return df

In [None]:
def fix_encoding(df_cleaned):
    encoding_map = {}
    for val in df_cleaned['neighbourhood'].unique():
        try:
            clean_val = val.encode("latin1").decode("utf-8", errors="ignore")
            encoding_map[val] = clean_val
        except (UnicodeEncodeError, AttributeError):
            encoding_map[val] = val
    df_cleaned['neighbourhood'] = df_cleaned['neighbourhood'].map(encoding_map)
    return df_cleaned

In [None]:
def data_cleanup(df_1, df_2):
    df_merged_listings = pd.concat([df_listings, df_listings_details], axis=1)
    df_merged_listings = df_merged_listings.loc[:, ~df_merged_listings.columns.duplicated()]
    df_cleaned = df_merged_listings[selected_columns].dropna()
    df_cleaned = df_cleaned[df_cleaned['availability_eoy']> 0]
    df_cleaned = df_cleaned[df_cleaned['availability_365']> 0]
    df_cleaned = df_cleaned[df_cleaned['estimated_occupancy_l365d']> 0]
    df_cleaned['host_response_rate'] = df_cleaned['host_response_rate'].str.replace('%', '', regex=False).astype(float)
    df_cleaned['host_acceptance_rate'] = df_cleaned['host_acceptance_rate'].str.replace('%', '', regex=False).astype(float)
    df_cleaned = normalize_tf_cols(df_cleaned, 'instant_bookable')
    df_cleaned = normalize_tf_cols(df_cleaned, 'host_identity_verified')
    df_cleaned = normalize_tf_cols(df_cleaned, 'host_is_superhost')
    df_cleaned = fix_encoding(df_cleaned)
    df_cleaned.columns = df_cleaned.columns.str.replace('/','_')
    df_cleaned.columns = df_cleaned.columns.str.lower()
    df_cleaned.columns = df_cleaned.columns.str.replace(' ','_')
    return df_cleaned

In [None]:
df_cleaned = data_cleanup(df_listings, df_listings_details)
df_cleaned = df_cleaned.reset_index(drop=True)

In [None]:
def identify_premium_properties(df, threshold=0.5):
    neighborhood_premium_stats = df_cleaned.groupby('neighbourhood').agg({
        'id':'count',
        'price':  lambda x: x.quantile(threshold),
        'review_scores_value': lambda x: x.quantile(threshold)
    })
    neighborhood_premium_stats = neighborhood_premium_stats.rename(
    columns={
        'price': 'price_q_threshold',
        'review_scores_value': 'rating_q_threshold'
    })
    neighborhood_premium_stats = neighborhood_premium_stats.reset_index()
    df_premium = df_cleaned.merge(
        neighborhood_premium_stats[['neighbourhood', 'price_q_threshold', 'rating_q_threshold']],
        on='neighbourhood',
        how='left'
    )
    df_premium['is_premium'] = (
            (df_premium['price'] >= df_premium['price_q_threshold']) &
            (df_premium['review_scores_value'] >= df_premium['rating_q_threshold'])
        )
    df_premium['is_premium'] = df_premium['is_premium'].astype(int)
    return df_premium

In [None]:
df_premium = identify_premium_properties(df_cleaned, 0.5)

In [None]:
df_premium[['id', 'picture_url']].head(2)

Unnamed: 0,id,picture_url
0,51287,https://a0.muscache.com/pictures/25163038/1c4e...
1,169672,https://a0.muscache.com/pictures/c1a1e093-66da...


In [None]:
def download_image_from_url(url):
    context = ssl._create_unverified_context()
    try:
        with request.urlopen(url, context=context) as resp:
            buffer = resp.read()
        stream = BytesIO(buffer)
        img = Image.open(stream)
        return img
    except:
        print("Image Not found exception")
        return None
    print("Image Not found")
    return None

In [None]:
def prepare_image(img, target_size):
    if img.mode != 'RGB':
        img = img.convert('RGB')
    img = img.resize(target_size, Image.NEAREST)
    return img

In [None]:
sample_df = df_premium.sample(100, random_state=42)

for _, row in tqdm(sample_df.iterrows(), total=len(sample_df)):
    url = row['picture_url']
    listing_id = row['id']

    if pd.isna(url):
        continue

    if row['is_premium'] == 1:
        folder = images_dir + "/premium"
    else:
        folder = images_dir + "/non_premium"

    save_path = f"{folder}/{listing_id}.jpg"
    print(save_path)
    img = download_image_from_url(url)

    if img is None:
        print(img)
        continue;

    resized_img =  prepare_image(img, (300,400))
    resized_img.save(save_path)

  0%|          | 0/100 [00:00<?, ?it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/964206565905955937.jpg


  1%|          | 1/100 [00:01<01:41,  1.03s/it]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1102298634083944389.jpg


  2%|▏         | 2/100 [00:01<01:25,  1.14it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1412226864397489130.jpg


  3%|▎         | 3/100 [00:03<01:52,  1.16s/it]

/content/drive/MyDrive/colab_nbs/airbnb/images/premium/977893173421197851.jpg


  4%|▍         | 4/100 [00:03<01:24,  1.13it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1468832232918817916.jpg


  5%|▌         | 5/100 [00:04<01:08,  1.38it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1370861391980058217.jpg


  6%|▌         | 6/100 [00:04<01:00,  1.56it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/956488003103130677.jpg


  7%|▋         | 7/100 [00:04<00:49,  1.87it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/33427950.jpg


  8%|▊         | 8/100 [00:05<00:45,  2.01it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/premium/1236285666417455881.jpg


  9%|▉         | 9/100 [00:05<00:41,  2.18it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/2023633.jpg


 10%|█         | 10/100 [00:06<00:36,  2.49it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1589958.jpg


 11%|█         | 11/100 [00:06<00:33,  2.69it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/premium/913409543080789024.jpg


 13%|█▎        | 13/100 [00:06<00:26,  3.34it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1435794393346098115.jpg
/content/drive/MyDrive/colab_nbs/airbnb/images/premium/42038043.jpg


 14%|█▍        | 14/100 [00:07<00:25,  3.35it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/15871850.jpg


 15%|█▌        | 15/100 [00:07<00:27,  3.08it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/premium/881993291204129160.jpg


 16%|█▌        | 16/100 [00:07<00:26,  3.20it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/premium/1192676874504918556.jpg


 17%|█▋        | 17/100 [00:08<00:34,  2.37it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/558627407177666539.jpg


 19%|█▉        | 19/100 [00:09<00:29,  2.73it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/956602313599110587.jpg
/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1434686995627172832.jpg


 20%|██        | 20/100 [00:09<00:30,  2.65it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1175375794383673365.jpg


 21%|██        | 21/100 [00:09<00:29,  2.69it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1486832333517933332.jpg


 22%|██▏       | 22/100 [00:10<00:27,  2.87it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/premium/1321780728981744999.jpg


 23%|██▎       | 23/100 [00:10<00:27,  2.85it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/999771241757947525.jpg


 24%|██▍       | 24/100 [00:11<00:30,  2.51it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/premium/13465011.jpg


 26%|██▌       | 26/100 [00:11<00:29,  2.54it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/premium/16901546.jpg
/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/911320357783783296.jpg


 28%|██▊       | 28/100 [00:12<00:23,  3.00it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1051980956818299849.jpg
/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1221691566248375820.jpg


 29%|██▉       | 29/100 [00:13<00:26,  2.70it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/20968137.jpg


 30%|███       | 30/100 [00:13<00:27,  2.53it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/premium/1429498467503360018.jpg


 31%|███       | 31/100 [00:13<00:25,  2.66it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/premium/731729250804895177.jpg


 32%|███▏      | 32/100 [00:14<00:25,  2.63it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1445553358216373740.jpg


 33%|███▎      | 33/100 [00:14<00:22,  2.99it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1276766742662315375.jpg


 34%|███▍      | 34/100 [00:14<00:20,  3.16it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1475303776582160471.jpg


 35%|███▌      | 35/100 [00:15<00:21,  2.96it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/premium/984685022149395272.jpg


 36%|███▌      | 36/100 [00:15<00:25,  2.55it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/39502609.jpg


 37%|███▋      | 37/100 [00:16<00:29,  2.12it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1365898285951274265.jpg


 38%|███▊      | 38/100 [00:16<00:27,  2.26it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/premium/1477512720964383197.jpg


 39%|███▉      | 39/100 [00:16<00:24,  2.48it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/927656533300967570.jpg


 40%|████      | 40/100 [00:18<00:39,  1.53it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1305125389268437369.jpg


 41%|████      | 41/100 [00:18<00:31,  1.85it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1257354199535563423.jpg


 42%|████▏     | 42/100 [00:18<00:30,  1.92it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1269430943941687172.jpg


 43%|████▎     | 43/100 [00:19<00:26,  2.17it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/52429687.jpg


 44%|████▍     | 44/100 [00:19<00:30,  1.86it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/891432288580421628.jpg


 45%|████▌     | 45/100 [00:20<00:30,  1.78it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/588963352504958123.jpg


 46%|████▌     | 46/100 [00:21<00:30,  1.77it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/42187036.jpg


 47%|████▋     | 47/100 [00:21<00:27,  1.93it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/23162465.jpg


 48%|████▊     | 48/100 [00:22<00:27,  1.90it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1399940204075102187.jpg


 49%|████▉     | 49/100 [00:22<00:23,  2.15it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/53804731.jpg


 50%|█████     | 50/100 [00:23<00:30,  1.64it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/premium/41005680.jpg


 51%|█████     | 51/100 [00:24<00:34,  1.41it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/premium/1377698155278292914.jpg


 52%|█████▏    | 52/100 [00:24<00:30,  1.58it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1444796147318067271.jpg


 53%|█████▎    | 53/100 [00:24<00:24,  1.95it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1472930121083833371.jpg


 54%|█████▍    | 54/100 [00:25<00:19,  2.35it/s]

Image Not found exception
None
/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/15533110.jpg


 55%|█████▌    | 55/100 [00:25<00:18,  2.50it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/659327138159218759.jpg


 56%|█████▌    | 56/100 [00:25<00:17,  2.56it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/17707210.jpg


 57%|█████▋    | 57/100 [00:26<00:14,  2.92it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/39907655.jpg


 58%|█████▊    | 58/100 [00:26<00:13,  3.19it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/premium/1249060900412409247.jpg


 59%|█████▉    | 59/100 [00:27<00:18,  2.21it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1047662301759564004.jpg


 60%|██████    | 60/100 [00:28<00:24,  1.61it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/41933968.jpg


 61%|██████    | 61/100 [00:28<00:22,  1.74it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1375894675730588394.jpg


 63%|██████▎   | 63/100 [00:29<00:18,  1.98it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/30721228.jpg
/content/drive/MyDrive/colab_nbs/airbnb/images/premium/796347626660284330.jpg


 64%|██████▍   | 64/100 [00:29<00:16,  2.21it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1263574939309463535.jpg


 65%|██████▌   | 65/100 [00:30<00:15,  2.33it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/681169225057555523.jpg


 66%|██████▌   | 66/100 [00:30<00:14,  2.38it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1249051997028257738.jpg


 67%|██████▋   | 67/100 [00:31<00:15,  2.13it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/50389214.jpg


 68%|██████▊   | 68/100 [00:31<00:13,  2.34it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/13402108.jpg


 69%|██████▉   | 69/100 [00:32<00:13,  2.30it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1478971935116628091.jpg


 70%|███████   | 70/100 [00:32<00:12,  2.39it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/618653373034224795.jpg


 71%|███████   | 71/100 [00:32<00:11,  2.52it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/3223934.jpg


 72%|███████▏  | 72/100 [00:33<00:11,  2.52it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1443322399354085701.jpg


 73%|███████▎  | 73/100 [00:33<00:11,  2.45it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/669898.jpg


 74%|███████▍  | 74/100 [00:34<00:11,  2.27it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/923694547962125347.jpg


 75%|███████▌  | 75/100 [00:34<00:11,  2.22it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1127540310652873661.jpg


 76%|███████▌  | 76/100 [00:35<00:12,  1.89it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1032760738873474382.jpg


 77%|███████▋  | 77/100 [00:35<00:11,  1.99it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1062575976373188289.jpg


 78%|███████▊  | 78/100 [00:36<00:09,  2.38it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/965282187428738008.jpg


 79%|███████▉  | 79/100 [00:36<00:07,  2.74it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1405219418750760436.jpg


 80%|████████  | 80/100 [00:36<00:07,  2.52it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/premium/1384762468722998035.jpg


 81%|████████  | 81/100 [00:37<00:07,  2.67it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/39250647.jpg


 82%|████████▏ | 82/100 [00:37<00:07,  2.30it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/premium/1421841945271739538.jpg


 83%|████████▎ | 83/100 [00:38<00:08,  2.07it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/premium/918514261372412148.jpg


 84%|████████▍ | 84/100 [00:38<00:08,  1.97it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/23206608.jpg


 85%|████████▌ | 85/100 [00:39<00:06,  2.24it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/premium/1161458653209749621.jpg


 86%|████████▌ | 86/100 [00:39<00:06,  2.13it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/41903558.jpg


 87%|████████▋ | 87/100 [00:40<00:05,  2.23it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1162773461272327022.jpg


 88%|████████▊ | 88/100 [00:40<00:05,  2.30it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/791418807869273736.jpg


 89%|████████▉ | 89/100 [00:40<00:04,  2.38it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1300144347861996852.jpg


 90%|█████████ | 90/100 [00:41<00:04,  2.23it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/premium/766512766713762470.jpg


 91%|█████████ | 91/100 [00:41<00:03,  2.42it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/42189674.jpg


 92%|█████████▏| 92/100 [00:42<00:03,  2.38it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/652677409364288108.jpg


 93%|█████████▎| 93/100 [00:42<00:02,  2.54it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/premium/51206431.jpg


 94%|█████████▍| 94/100 [00:43<00:02,  2.13it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1293449409898570457.jpg


 95%|█████████▌| 95/100 [00:43<00:02,  2.38it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/premium/690888483154865051.jpg


 96%|█████████▌| 96/100 [00:43<00:01,  2.46it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1389831309311365517.jpg


 97%|█████████▋| 97/100 [00:44<00:01,  2.14it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/1424555510816756744.jpg


 98%|█████████▊| 98/100 [00:44<00:00,  2.45it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/44027879.jpg


 99%|█████████▉| 99/100 [00:44<00:00,  2.78it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/non_premium/34876910.jpg


100%|██████████| 100/100 [00:45<00:00,  2.20it/s]


In [None]:
import tensorflow as tf
from tensorflow.keras import layers

img_size = (224, 224)
batch_size = 10

dataset = tf.keras.preprocessing.image_dataset_from_directory(
    images_dir,
    image_size=img_size,
    batch_size=batch_size,
    label_mode="binary"
)

Found 99 files belonging to 2 classes.


In [None]:
train_ds = dataset.take(int(len(dataset)*0.8))
val_ds   = dataset.skip(int(len(dataset)*0.8))
train_ds = train_ds.prefetch(tf.data.AUTOTUNE)
val_ds   = val_ds.prefetch(tf.data.AUTOTUNE)
len(dataset), len(train_ds), len(val_ds)

(10, 8, 2)

In [None]:
augment = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.05),
    layers.RandomZoom(0.1)
])

In [None]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras import models

base = EfficientNetB0(
    include_top=False,
    input_shape=img_size + (3,),
    weights="imagenet"
)

base.trainable = False


In [None]:
inputs = layers.Input(shape=img_size + (3,))
x = augment(inputs)
x = tf.keras.applications.efficientnet.preprocess_input(x)
x = base(x, training=False)
x = layers.GlobalAveragePooling2D()(x)

x = layers.Dense(256, activation="relu")(x)
x = layers.Dropout(0.4)(x)

outputs = layers.Dense(1, activation="sigmoid")(x)

model = models.Model(inputs, outputs)

In [None]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.array([0, 1]),
    y=[0]* 75 + [1]* 24
)

class_weights = {0: class_weights[0], 1: class_weights[1]}
class_weights

{0: np.float64(0.66), 1: np.float64(2.0625)}

In [None]:
model.compile(
    optimizer="adam",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=8,
    class_weight=class_weights
)

Epoch 1/8
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 1s/step - accuracy: 0.4495 - loss: 0.7559 - val_accuracy: 0.8421 - val_loss: 0.5207
Epoch 2/8
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 977ms/step - accuracy: 0.6526 - loss: 0.6495 - val_accuracy: 0.6316 - val_loss: 0.6433
Epoch 3/8
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 802ms/step - accuracy: 0.6140 - loss: 0.5580 - val_accuracy: 0.8947 - val_loss: 0.4128
Epoch 4/8
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1s/step - accuracy: 0.9210 - loss: 0.4426 - val_accuracy: 0.8947 - val_loss: 0.3795
Epoch 5/8
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 809ms/step - accuracy: 0.8205 - loss: 0.3468 - val_accuracy: 0.7368 - val_loss: 0.5299
Epoch 6/8
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1s/step - accuracy: 0.8171 - loss: 0.4389 - val_accuracy: 0.8947 - val_loss: 0.2476
Epoch 7/8
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [None]:
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report

y_true = np.concatenate([y for _, y in val_ds], axis=0)
y_pred = model.predict(val_ds).ravel()
y_class = (y_pred >= 0.46).astype(int)
print(confusion_matrix(y_true, y_class))
print(classification_report(y_true, y_class))

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 438ms/step
[[11  3]
 [ 1  4]]
              precision    recall  f1-score   support

         0.0       0.92      0.79      0.85        14
         1.0       0.57      0.80      0.67         5

    accuracy                           0.79        19
   macro avg       0.74      0.79      0.76        19
weighted avg       0.83      0.79      0.80        19



In [None]:
model.save(output_dir + "cnn_premium_detector.keras")

In [None]:
sample_df = df_premium.sample(100, random_state=30)

for _, row in tqdm(sample_df.iterrows(), total=len(sample_df)):
    url = row['picture_url']
    listing_id = row['id']

    if pd.isna(url):
        continue

    if row['is_premium'] == 1:
        folder = images_dir + "/test/premium"
    else:
        folder = images_dir + "/test/non_premium"

    save_path = f"{folder}/{listing_id}.jpg"
    print(save_path)
    img = download_image_from_url(url)

    if img is None:
        print(img)
        continue;

    resized_img =  prepare_image(img, (300,400))
    resized_img.save(save_path)

  0%|          | 0/100 [00:00<?, ?it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/948991807741346465.jpg


  1%|          | 1/100 [00:00<00:31,  3.13it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/14726391.jpg


  2%|▏         | 2/100 [00:00<00:33,  2.94it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/618578894243367561.jpg


  3%|▎         | 3/100 [00:01<00:38,  2.51it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/51477692.jpg


  4%|▍         | 4/100 [00:01<00:36,  2.64it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1481604275518621651.jpg


  5%|▌         | 5/100 [00:02<00:52,  1.83it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/51302449.jpg


  7%|▋         | 7/100 [00:02<00:34,  2.66it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1156264089816674365.jpg
/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/53160983.jpg


  8%|▊         | 8/100 [00:04<00:59,  1.54it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/22726811.jpg


  9%|▉         | 9/100 [00:04<00:57,  1.57it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/37235991.jpg


 10%|█         | 10/100 [00:05<00:54,  1.65it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/27269930.jpg


 11%|█         | 11/100 [00:05<00:48,  1.82it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1165965288465045652.jpg


 12%|█▏        | 12/100 [00:06<00:54,  1.61it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1271803004262713942.jpg


 13%|█▎        | 13/100 [00:06<00:43,  2.01it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1486807586946115813.jpg


 14%|█▍        | 14/100 [00:06<00:35,  2.43it/s]

Image Not found exception
None
/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1465166502161717013.jpg


 15%|█▌        | 15/100 [00:07<00:36,  2.33it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1478254973649137680.jpg


 16%|█▌        | 16/100 [00:07<00:30,  2.71it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1282316335883715400.jpg


 17%|█▋        | 17/100 [00:08<00:40,  2.03it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/767364475139732271.jpg


 18%|█▊        | 18/100 [00:08<00:34,  2.38it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/827402796407955175.jpg


 19%|█▉        | 19/100 [00:08<00:30,  2.68it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/1458093513734183323.jpg


 20%|██        | 20/100 [00:09<00:35,  2.25it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1106272829646887625.jpg


 21%|██        | 21/100 [00:10<00:43,  1.80it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1382572270062542981.jpg


 22%|██▏       | 22/100 [00:10<00:36,  2.13it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1175375794383673365.jpg


 23%|██▎       | 23/100 [00:10<00:30,  2.50it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1209527391907353565.jpg


 24%|██▍       | 24/100 [00:11<00:29,  2.61it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/581007685568093925.jpg


 25%|██▌       | 25/100 [00:11<00:33,  2.25it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/927339261593951595.jpg


 26%|██▌       | 26/100 [00:11<00:27,  2.67it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/44027818.jpg


 27%|██▋       | 27/100 [00:12<00:26,  2.74it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/8074675.jpg


 28%|██▊       | 28/100 [00:12<00:28,  2.55it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/814467635208335040.jpg


 29%|██▉       | 29/100 [00:13<00:26,  2.64it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/302499.jpg


 30%|███       | 30/100 [00:13<00:34,  2.02it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1193583575642147874.jpg


 31%|███       | 31/100 [00:14<00:35,  1.95it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/1217559907407964467.jpg


 32%|███▏      | 32/100 [00:14<00:31,  2.13it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/22779759.jpg


 33%|███▎      | 33/100 [00:15<00:29,  2.25it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/1400606823492026665.jpg


 35%|███▌      | 35/100 [00:15<00:21,  3.03it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/42038043.jpg
/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/628754117116222184.jpg


 36%|███▌      | 36/100 [00:15<00:19,  3.27it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/26490184.jpg


 38%|███▊      | 38/100 [00:16<00:18,  3.31it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1108128.jpg
/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1244757980799118246.jpg


 39%|███▉      | 39/100 [00:16<00:21,  2.90it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/797043597716246313.jpg


 40%|████      | 40/100 [00:17<00:26,  2.24it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/1155223688248348958.jpg


 41%|████      | 41/100 [00:18<00:32,  1.79it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1424523973794985848.jpg


 43%|████▎     | 43/100 [00:18<00:21,  2.64it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/46354460.jpg
/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/722238458119883499.jpg


 44%|████▍     | 44/100 [00:19<00:18,  3.00it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/845623878523367364.jpg


 45%|████▌     | 45/100 [00:19<00:20,  2.66it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/50970621.jpg


 46%|████▌     | 46/100 [00:20<00:21,  2.50it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/1313860234692330913.jpg


 48%|████▊     | 48/100 [00:20<00:15,  3.42it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/1188231119037880707.jpg
/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/14837473.jpg


 49%|████▉     | 49/100 [00:20<00:16,  3.07it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/917843569566981138.jpg


 50%|█████     | 50/100 [00:21<00:23,  2.15it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/672314223257163050.jpg


 51%|█████     | 51/100 [00:21<00:20,  2.34it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/31180556.jpg


 52%|█████▏    | 52/100 [00:22<00:22,  2.17it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/882372743810613001.jpg


 53%|█████▎    | 53/100 [00:23<00:22,  2.09it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/40157806.jpg


 55%|█████▌    | 55/100 [00:23<00:15,  2.97it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/2750261.jpg
/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1286148159633244816.jpg


 56%|█████▌    | 56/100 [00:23<00:14,  3.13it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1214622611888718902.jpg


 58%|█████▊    | 58/100 [00:24<00:11,  3.56it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/52966746.jpg
/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/786877831492700740.jpg


 60%|██████    | 60/100 [00:24<00:11,  3.41it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/876128973819095991.jpg
/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/787539008753231539.jpg


 61%|██████    | 61/100 [00:25<00:14,  2.69it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1142158650952374467.jpg


 63%|██████▎   | 63/100 [00:25<00:10,  3.55it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/45855087.jpg
/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/26173181.jpg


 65%|██████▌   | 65/100 [00:26<00:07,  4.81it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1470122141724893631.jpg
/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/560592172186883310.jpg


 67%|██████▋   | 67/100 [00:26<00:06,  5.18it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1032748262670394226.jpg
/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/29901052.jpg


 68%|██████▊   | 68/100 [00:26<00:07,  4.49it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/4413284.jpg


 69%|██████▉   | 69/100 [00:27<00:07,  4.14it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/28025153.jpg


 71%|███████   | 71/100 [00:27<00:06,  4.54it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1278335777921591385.jpg
/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/1330551335349505519.jpg


 72%|███████▏  | 72/100 [00:28<00:08,  3.15it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/1443322424571517776.jpg


 73%|███████▎  | 73/100 [00:28<00:07,  3.47it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/824479617590648053.jpg


 75%|███████▌  | 75/100 [00:28<00:06,  3.84it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/993571328089628837.jpg
/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/24295229.jpg


 76%|███████▌  | 76/100 [00:29<00:05,  4.03it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/48134900.jpg


 78%|███████▊  | 78/100 [00:29<00:04,  4.73it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/44027854.jpg
/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/979416776671651169.jpg


 80%|████████  | 80/100 [00:30<00:05,  3.79it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1485516735869345764.jpg
/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/1206466226819110768.jpg


 82%|████████▏ | 82/100 [00:30<00:05,  3.51it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/40917060.jpg
/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/1031078559343583039.jpg


 83%|████████▎ | 83/100 [00:31<00:08,  1.91it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/45853532.jpg


 85%|████████▌ | 85/100 [00:32<00:06,  2.15it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/731130800716277122.jpg
/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/1459438452081519660.jpg


 86%|████████▌ | 86/100 [00:32<00:05,  2.72it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/1227488267118467036.jpg


 87%|████████▋ | 87/100 [00:33<00:04,  2.97it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1397052940849496741.jpg


 88%|████████▊ | 88/100 [00:34<00:06,  1.94it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1307269702802818793.jpg


 89%|████████▉ | 89/100 [00:34<00:05,  2.08it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/5835467.jpg


 90%|█████████ | 90/100 [00:35<00:04,  2.05it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1458888487899624701.jpg


 91%|█████████ | 91/100 [00:36<00:05,  1.55it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/857285508547639308.jpg


 93%|█████████▎| 93/100 [00:36<00:03,  2.33it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/959125902868895082.jpg
/content/drive/MyDrive/colab_nbs/airbnb/images/test/premium/1488366528117917561.jpg


 94%|█████████▍| 94/100 [00:36<00:02,  2.40it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/7962637.jpg


 95%|█████████▌| 95/100 [00:37<00:01,  2.68it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/20854119.jpg


 97%|█████████▋| 97/100 [00:37<00:01,  2.88it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/8049046.jpg
/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/1218675917828964223.jpg


 98%|█████████▊| 98/100 [00:38<00:00,  3.29it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/978051288949733487.jpg


100%|██████████| 100/100 [00:38<00:00,  2.59it/s]

/content/drive/MyDrive/colab_nbs/airbnb/images/test/non_premium/43976150.jpg





In [None]:
def predict_image(path):
    img = Image.open(path).convert("RGB")
    img = img.resize(img_size)

    arr = np.array(img) / 255.0
    arr = np.expand_dims(arr, axis=0)

    prob = model.predict(arr)[0][0]

    print("Path:", path)
    print("Premium probability:", prob)

    if prob >= 0.6:
        print("Prediction → PREMIUM")
        return True
    else:
        print("Prediction → NON-PREMIUM")
        return False

In [None]:
test_dir = '/content/drive/MyDrive/colab_nbs/airbnb/test'
test_folder = test_dir + "/images"
non_premium_count = 0
premium_count = 0
for filename in os.listdir(test_folder):
  full_path = os.path.join(test_folder, filename)
  result = predict_image(full_path)
  if(result is True):
    premium_count = premium_count + 1
  else:
    non_premium_count = non_premium_count + 1
print("Total Premium", premium_count )
print("Total Non-Premium", non_premium_count )

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
Path: /content/drive/MyDrive/colab_nbs/airbnb/test/images/29901052.jpg
Premium probability: 0.7225854
Prediction → PREMIUM
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step
Path: /content/drive/MyDrive/colab_nbs/airbnb/test/images/302499.jpg
Premium probability: 0.7228488
Prediction → PREMIUM
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
Path: /content/drive/MyDrive/colab_nbs/airbnb/test/images/2750261.jpg
Premium probability: 0.72347444
Prediction → PREMIUM
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step
Path: /content/drive/MyDrive/colab_nbs/airbnb/test/images/45853532.jpg
Premium probability: 0.7191277
Prediction → PREMIUM
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 110ms/step
Path: /content/drive/MyDrive/colab_nbs/airbnb/test/images/44027854.jpg
Premium probability: 0.72373205
Prediction → PREMIUM
[1m1/1[0m [32m━━━━━━━━━