In [None]:
import pandas as pd
import numpy as np
# Ensure the file is uploaded to Colab first
# You might need to run the files.upload() command if you haven't already
from google.colab import files
uploaded = files.upload()

Saving Filtered_Video_Games_Data_Top_Genres.csv to Filtered_Video_Games_Data_Top_Genres (1).csv


In [None]:
df = pd.read_csv('Filtered_Video_Games_Data_Top_Genres', header=None)

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# --- Config ---
csv_file = 'Filtered_Video_Games_Data.csv'  # Your CSV file
image_base_folder = 'vg_images'  # Folder with downloaded images
image_column = 'img'  # CSV column with relative image paths
label_column = 'genre'  # Target label column
image_size = (224, 224)  # Input size for MobileNetV2

# --- Load CSV ---
df = pd.read_csv(csv_file)

# Drop rows with missing images or genre
df = df.dropna(subset=[image_column, label_column])

# Filter rows where image file actually exists
df['image_path'] = df[image_column].apply(lambda x: os.path.join(image_base_folder, os.path.basename(x.strip())))
df = df[df['image_path'].apply(os.path.isfile)]

print(f"Number of samples with images: {len(df)}")

# --- Encode labels ---
label_encoder = LabelEncoder()
df['label_enc'] = label_encoder.fit_transform(df[label_column])

# --- Load and preprocess images ---
def load_and_preprocess_image(path):
    img = load_img(path, target_size=image_size)  # Load and resize
    img_array = img_to_array(img)
    img_array = preprocess_input(img_array)  # Preprocessing specific to MobileNetV2
    return img_array

X = np.array([load_and_preprocess_image(p) for p in df['image_path']])
y = to_categorical(df['label_enc'])

# --- Train/test split ---
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.5, stratify=y, random_state=42)

# --- Build model with transfer learning (MobileNetV2 base) ---
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(image_size[0], image_size[1], 3))

# Freeze base model layers
base_model.trainable = False

# Add custom classifier head
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)
output = Dense(y.shape[1], activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=output)

model.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])

# --- Train ---
history = model.fit(x_train, y_train, epochs=10, batch_size=32, validation_split=0.1)

# --- Evaluate ---
y_pred_prob = model.predict(x_test)
y_pred = np.argmax(y_pred_prob, axis=1)
y_true = np.argmax(y_test, axis=1)

accuracy = accuracy_score(y_true, y_pred)
print(f"Test Accuracy: {accuracy:.4f}")

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=label_encoder.classes_)
disp.plot(cmap='Reds', xticks_rotation=45)
plt.title("Confusion Matrix: Genre Classification from Images")
plt.tight_layout()
plt.show()


Number of samples with images: 7435


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['label_enc'] = label_encoder.fit_transform(df[label_column])


In [None]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('Filtered_Video_Games_Data_Top_Genres.csv', header=None)  # Set header=None if there's no header
image_paths = df[0].values

In [7]:
import csv
import os
import requests

csv_file = 'Filtered_Video_Games_Data.csv'       # Your CSV filename
output_folder = 'vg_images'    # Folder to save images
os.makedirs(output_folder, exist_ok=True)

base_url = 'https://www.vgchartz.com'  # Base URL for images
url_column = 'img'  # The CSV column header for the image path (looks like 'img' from your snippet)

with open(csv_file, newline='', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    for i, row in enumerate(reader):
        relative_path = row[url_column].strip()
        full_url = base_url + relative_path

        try:
            response = requests.get(full_url, timeout=10)
            response.raise_for_status()  # raise exception for bad responses

            filename = os.path.basename(relative_path)
            save_path = os.path.join(output_folder, filename)

            with open(save_path, 'wb') as img_file:
                img_file.write(response.content)
            print(f'Downloaded {filename}')
        except Exception as e:
            print(f'Failed to download {full_url}: {e}')

Downloaded full_6510540AmericaFrontccc.jpg
Downloaded full_5563178AmericaFrontccc.jpg
Downloaded 827563ccc.jpg
Downloaded full_9218923AmericaFrontccc.jpg
Downloaded full_4990510AmericaFrontccc.jpg
Downloaded full_call-of-duty-modern-warfare-3_517AmericaFront.jpg
Downloaded full_call-of-duty-black-ops_5AmericaFront.jpg
Downloaded full_1977964AmericaFrontccc.jpg
Downloaded full_4649679AmericaFrontccc.png
Downloaded full_809251AmericaFrontccc.jpg
Downloaded full_4380292AmericaFrontccc.jpg
Downloaded full_call-of-duty-modern-warfare-3_278AmericaFront.jpg
Downloaded 3570928ccc.jpg
Downloaded full_call-of-duty-black-ops_3AmericaFront.jpg
Downloaded full_5257064AmericaFrontccc.jpg
Downloaded full_1182151AmericaFrontccc.jpg
Downloaded full_7661370AmericaFrontccc.jpg
Downloaded full_call-of-duty-modern-warfare-2_1AmericaFront.jpg
Downloaded full_1729769AmericaFrontccc.jpg
Downloaded full_8522439AmericaFrontccc.jpg
Downloaded full_3698558AmericaFrontccc.jpg
Downloaded full_halo-reach_6AmericaFro

KeyboardInterrupt: 