<a href="https://colab.research.google.com/github/amirrehman19/Ai-project/blob/main/Movie_Genre_Classification_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
uploaded = files.upload()


Saving description.txt to description.txt
Saving test_data.txt to test_data.txt
Saving test_data_solution.txt to test_data_solution.txt
Saving train_data.txt to train_data.txt


In [None]:
import pandas as pd

def load_custom_txt(filepath, is_train=True):
    rows = []
    with open(filepath, 'r', encoding='utf-8') as file:
        for line in file:
            parts = line.strip().split(' ::: ')
            if is_train and len(parts) == 4:
                rows.append({'id': parts[0], 'title': parts[1], 'genre': parts[2], 'plot': parts[3]})
            elif not is_train and len(parts) == 3:
                rows.append({'id': parts[0], 'title': parts[1], 'plot': parts[2]})
    return pd.DataFrame(rows)

# Load training and test datasets
train_df = load_custom_txt('train_data.txt', is_train=True)
test_df = load_custom_txt('test_data.txt', is_train=False)
test_solution_df = load_custom_txt('test_data_solution.txt', is_train=True)  # also contains genre


In [None]:
import re

def preprocess(text):
    text = text.lower()
    text = re.sub(r'[^a-z0-9\s]', '', text)
    return text

train_df['clean_plot'] = train_df['plot'].apply(preprocess)
test_df['clean_plot'] = test_df['plot'].apply(preprocess)


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
X_train = vectorizer.fit_transform(train_df['clean_plot'])
X_test = vectorizer.transform(test_df['clean_plot'])

y_train = train_df['genre']


In [None]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)


In [None]:
test_preds = model.predict(X_test)

submission_df = pd.DataFrame({
    'id': test_df['id'],
    'predicted_genre': test_preds
})


In [None]:
# Merge predictions with true labels
merged = pd.merge(submission_df, test_solution_df[['id', 'genre']], on='id')
accuracy = (merged['predicted_genre'] == merged['genre']).mean()
print(f"Test Accuracy: {accuracy:.2f}")


Test Accuracy: 0.58


In [None]:
submission_df.to_csv('genre_predictions.csv', index=False)
files.download('genre_predictions.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Function to clean user input
import re

def preprocess(text):
    text = text.lower()
    text = re.sub(r'[^a-z0-9\s]', '', text)
    return text

# Function to predict genre from plot
def predict_genre_for_plot(plot_text):
    cleaned = preprocess(plot_text)
    vector = vectorizer.transform([cleaned])
    prediction = model.predict(vector)[0]
    return prediction

# Ask the user for a movie plot
user_plot = input("🎬 Enter the movie plot summary: ")

# Predict genre
predicted_genre = predict_genre_for_plot(user_plot)

# Show result
print("\n🧠 Predicted Genre:", predicted_genre)


🎬 Enter the movie plot summary: In Earth's future, a global crop blight and second Dust Bowl are slowly rendering the planet uninhabitable. Professor Brand (Michael Caine), a brilliant NASA physicist, is working on plans to save mankind by transporting Earth's population to a new home via a wormhole. But first, Brand must send former NASA pilot Cooper (Matthew McConaughey) and a team of researchers through the wormhole and across the galaxy to find out which of three planets could be mankind's new home.

🧠 Predicted Genre: sci-fi


In [None]:
def load_custom_file_for_prediction(filepath):
    data = []
    with open(filepath, 'r', encoding='utf-8') as f:
        for line in f:
            parts = line.strip().split(' ::: ')
            if len(parts) == 3:
                data.append({'id': parts[0], 'title': parts[1], 'plot': parts[2]})
    return pd.DataFrame(data)

def preprocess(text):
    text = text.lower()
    text = re.sub(r'[^a-z0-9\s]', '', text)
    return text

def predict_genres(df):
    df['clean_plot'] = df['plot'].apply(preprocess)
    X_input = vectorizer.transform(df['clean_plot'])
    df['predicted_genre'] = model.predict(X_input)
    return df[['id', 'title', 'predicted_genre']]

# Load file and make predictions
custom_df = load_custom_file_for_prediction('custom_test_plots.txt')
predicted_df = predict_genres(custom_df)

# Print nicely
for _, row in predicted_df.iterrows():
    print(f"🎬 {row['title']} → 🧠 Predicted Genre: {row['predicted_genre']}")


🎬 The Magical School → 🧠 Predicted Genre: drama
🎬 Battle of the Ages → 🧠 Predicted Genre: documentary
🎬 Love at Sunset → 🧠 Predicted Genre: drama
🎬 The Murder Puzzle → 🧠 Predicted Genre: thriller
🎬 The Robot's Heart → 🧠 Predicted Genre: short
🎬 Laughter Therapy → 🧠 Predicted Genre: comedy
🎬 Ghost in the Mansion → 🧠 Predicted Genre: horror
🎬 Space Voyage → 🧠 Predicted Genre: documentary
🎬 Justice Bound → 🧠 Predicted Genre: drama
🎬 King of the Ring → 🧠 Predicted Genre: documentary


In [None]:
custom_data = """
1 ::: The Chemist's Code ::: A high school teacher turns into a meth-cooking criminal mastermind. ::: breaking bad
2 ::: Thrones of Fire ::: Noble families battle for control over a mystical land full of dragons and betrayal. ::: game of thrones
3 ::: Time Loop ::: A boy disappears, exposing the town's dark, time-twisted secrets. ::: dark
4 ::: To the Stars ::: Astronauts embark on a dangerous mission through wormholes to save humanity. ::: interstellar
5 ::: Prison Walls ::: A genius engineer gets imprisoned intentionally to save his brother. ::: prison break
6 ::: Heroes United ::: Superheroes team up to fight a cosmic threat trying to destroy the Earth. ::: avengers
7 ::: Love Code ::: A programmer falls for his co-worker while working on a dating app. ::: romantic comedy
8 ::: Haunted Woods ::: A family vacation goes wrong when supernatural forces emerge in the forest. ::: horror
9 ::: The Last Puzzle ::: A detective must solve a series of strange murders that tie to his past. ::: mystery
10 ::: Underdog Glory ::: An ex-convict trains a local kid to win the national boxing title. ::: sports drama
"""

with open("custom_test_plots_labeled.txt", "w") as f:
    for i, line in enumerate(custom_data.strip().split("\n"), 1):
        parts = line.strip().split(" ::: ")
        f.write(f"{i} ::: {parts[1]} ::: {parts[2]}\n")

print("✅ Custom genre-labeled test file created!")


✅ Custom genre-labeled test file created!


In [None]:
def load_custom_file_for_prediction(filepath):
    data = []
    with open(filepath, 'r', encoding='utf-8') as f:
        for line in f:
            parts = line.strip().split(' ::: ')
            if len(parts) == 3:
                data.append({'id': parts[0], 'title': parts[1], 'plot': parts[2]})
    return pd.DataFrame(data)

def preprocess(text):
    text = text.lower()
    text = re.sub(r'[^a-z0-9\s]', '', text)
    return text

def predict_genres(df):
    df['clean_plot'] = df['plot'].apply(preprocess)
    X_input = vectorizer.transform(df['clean_plot'])
    df['predicted_genre'] = model.predict(X_input)
    return df[['id', 'title', 'predicted_genre']]

# Load file and make predictions
custom_df = load_custom_file_for_prediction('custom_test_plots_labeled.txt')
predicted_df = predict_genres(custom_df)

# Print nicely
for _, row in predicted_df.iterrows():
    print(f"🎬 {row['title']} → 🧠 Predicted Genre: {row['predicted_genre']}")


🎬 The Chemist's Code → 🧠 Predicted Genre: comedy
🎬 Thrones of Fire → 🧠 Predicted Genre: drama
🎬 Time Loop → 🧠 Predicted Genre: drama
🎬 To the Stars → 🧠 Predicted Genre: action
🎬 Prison Walls → 🧠 Predicted Genre: drama
🎬 Heroes United → 🧠 Predicted Genre: sci-fi
🎬 Love Code → 🧠 Predicted Genre: comedy
🎬 Haunted Woods → 🧠 Predicted Genre: horror
🎬 The Last Puzzle → 🧠 Predicted Genre: thriller
🎬 Underdog Glory → 🧠 Predicted Genre: sport
