In [None]:
# Movie Genre Classification - Experiments & Testing
# CodSoft ML Internship - Task 1

import pickle
import pandas as pd
import numpy as np
import re

# Load trained model and artifacts
print("Loading trained model and artifacts...")
with open('../models/model.pkl', 'rb') as f:
    model = pickle.load(f)

with open('../artifacts/vectorizer.pkl', 'rb') as f:
    vectorizer = pickle.load(f)

with open('../artifacts/label_encoder.pkl', 'rb') as f:
    label_encoder = pickle.load(f)

print("✅ Model and artifacts loaded successfully!")

In [None]:
# Text preprocessing function
def preprocess_text(text):
    """Clean and preprocess text data"""
    if pd.isna(text):
        return ""
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

In [None]:
# Prediction function
def predict_genre(description):
    """Predict genre from movie description"""
    # Preprocess
    cleaned = preprocess_text(description)
    
    # Vectorize
    vectorized = vectorizer.transform([cleaned])
    
    # Predict
    prediction = model.predict(vectorized)[0]
    
    # Get probabilities (if available)
    if hasattr(model, 'predict_proba'):
        probabilities = model.predict_proba(vectorized)[0]
        confidence = max(probabilities)
    elif hasattr(model, 'decision_function'):
        decision = model.decision_function(vectorized)[0]
        confidence = max(decision)
    else:
        confidence = None
    
    # Decode genre
    genre = label_encoder.inverse_transform([prediction])[0]
    
    return genre, confidence

In [None]:
# Test with custom examples
print("\n" + "="*60)
print("TESTING WITH CUSTOM MOVIE DESCRIPTIONS")
print("="*60)

test_examples = [
    {
        "title": "The Space Adventure",
        "description": "A team of astronauts embarks on a dangerous mission to explore a distant planet and encounter alien life forms."
    },
    {
        "title": "Love in Paris",
        "description": "A young woman falls in love with a stranger she meets in a coffee shop in Paris, leading to a beautiful romance."
    },
    {
        "title": "The Dark Mystery",
        "description": "A detective investigates a series of murders in a small town, uncovering dark secrets and shocking truths."
    },
    {
        "title": "Laugh Out Loud",
        "description": "A clumsy waiter causes chaos in a fancy restaurant with hilarious consequences and funny situations."
    },
    {
        "title": "The Final Battle",
        "description": "Warriors prepare for an epic battle against evil forces threatening to destroy their kingdom with magical powers."
    }
]

In [None]:
for i, example in enumerate(test_examples, 1):
    print(f"\n{i}. {example['title']}")
    print(f"   Description: {example['description'][:80]}...")
    
    genre, confidence = predict_genre(example['description'])
    
    print(f"   ➜ Predicted Genre: {genre}")
    if confidence is not None:
        print(f"   ➜ Confidence: {confidence:.4f}")
    print("-" * 60)

In [None]:
# Test on actual test data (if available)
print("\n" + "="*60)
print("PREDICTIONS ON TEST DATASET")
print("="*60)

try:
    test_data = pd.read_csv('../data/test_data_solution.txt')
    print(f"\nTest data loaded: {len(test_data)} samples")
    
    # Make predictions on first 10 samples
    print("\nSample Predictions:")
    for idx in range(min(10, len(test_data))):
        row = test_data.iloc[idx]
        title = row['Title'] if 'Title' in row else f"Movie {idx+1}"
        description = row['Cleaned_Description'] if 'Cleaned_Description' in row else row['Description']
        
        genre, confidence = predict_genre(description)
        
        print(f"\n{idx+1}. {title}")
        print(f"   Predicted Genre: {genre}")
        if confidence is not None:
            print(f"   Confidence: {confidence:.4f}")
    
    # Make predictions on all test data
    print("\n\nGenerating predictions for all test samples...")
    test_predictions = []
    for idx in range(len(test_data)):
        description = test_data.iloc[idx]['Cleaned_Description'] if 'Cleaned_Description' in test_data.columns else test_data.iloc[idx]['Description']
        genre, _ = predict_genre(description)
        test_predictions.append(genre)
    
    # Save predictions
    test_data['Predicted_Genre'] = test_predictions
    test_data.to_csv('../data/test_predictions.csv', index=False)
    print("✅ Predictions saved to: ../data/test_predictions.csv")

except FileNotFoundError:
    print("\n⚠️  Test data file not found. Skipping test predictions.")

In [None]:
# Interactive prediction
print("\n" + "="*60)
print("INTERACTIVE PREDICTION MODE")
print("="*60)
print("\nYou can now test with your own movie descriptions!")
print("(In Jupyter notebook, uncomment the code below to enable)")

# Uncomment this for interactive mode in Jupyter:
"""
while True:
    description = input("\nEnter movie description (or 'quit' to exit): ")
    if description.lower() == 'quit':
        break
    
    genre, confidence = predict_genre(description)
    print(f"\n➜ Predicted Genre: {genre}")
    if confidence is not None:
        print(f"➜ Confidence: {confidence:.4f}")
"""

print("\n✅ Experiments completed!")
print("\nYou can now use the model to predict genres for any movie description.")
print("\nUsage:")
print("  genre, confidence = predict_genre('Your movie description here')")