<a href="https://colab.research.google.com/github/azrabano23/autoreel/blob/main/carsmovies.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
from flask import Flask

In [None]:
# Load dataset
# Ensure the dataset "Cars in Movies.csv" is uploaded to your Colab environment
# Replace the file path if necessary
df = pd.read_csv('Cars in Movies.csv', sep=None, engine='python')  # Add sep=None and engine='python'


In [None]:
df['Year'] = pd.to_numeric(df['Car Year'], errors='coerce')  # Convert year column to numeric
df['Decade'] = (df['Year'] // 10) * 10

In [None]:
# Helper function for trend analysis
def plot_popular_vehicles_by_decade(df):
    popular_vehicles = df.groupby(['Decade', 'Car Model']).size().reset_index(name='Count')
    popular_vehicles = popular_vehicles.sort_values(by=['Decade', 'Count'], ascending=[True, False])

    plt.figure(figsize=(12, 6))
    for decade in popular_vehicles['Decade'].unique():
        subset = popular_vehicles[popular_vehicles['Decade'] == decade].head(5)
        plt.bar(subset['Car Model'], subset['Count'], label=f"{decade}s")

    plt.title("Top 5 Popular Cars by Decade")
    plt.xlabel("Car Models")
    plt.ylabel("Frequency in Movies")
    plt.xticks(rotation=45)
    plt.legend()
    plt.tight_layout()
    plt.show()

In [None]:
# Load dataset
# Ensure the dataset "Cars in Movies.csv" is uploaded to your Colab environment
# Replace the file path if necessary
df = pd.read_csv('Cars in Movies.csv', sep=None, engine='python')  # Add sep=None and engine='python'
# Rename the column to match what the code is looking for.
df = df.rename(columns={"Car Make/Model": "Car Model"})

In [None]:
# Adjust the column name for year
df = df.rename(columns={"Movie Year": "Year", "Car Make/Model": "Car Name"})  # Adjust these as necessary

# Car-to-Movie Recommendation
def recommend_movies_by_car(car_model):
    if 'Car Name' not in df.columns or 'Movie Title' not in df.columns:
        print("Required columns are missing!")
        return None
    return df[df['Car Name'].str.contains(car_model, case=False, na=False)][['Movie Title', 'Year']]

# Movie-to-Car Recommendation
def recommend_cars_by_movie(movie_title):
    if 'Car Name' not in df.columns or 'Movie Title' not in df.columns:
        print("Required columns are missing!")
        return None
    return df[df['Movie Title'].str.contains(movie_title, case=False, na=False)][['Car Name', 'Year']]

# Test Recommendations
car_recommendations = recommend_movies_by_car("Ford Mustang")
if car_recommendations is not None:
    print("Movies featuring Ford Mustang:")
    print(car_recommendations.head())

movie_recommendations = recommend_cars_by_movie("Fast and Furious")
if movie_recommendations is not None:
    print("Cars in Fast and Furious:")
    print(movie_recommendations.head())


Movies featuring Ford Mustang:
                                Movie Title    Year
0    The Flood: Who Will Save Our Children?  1993.0
139                              Smallville  2001.0
308                                Jake 2.0  2003.0
428                               The Hours  2002.0
429                        Marked for Death  1990.0
Cars in Fast and Furious:
                     Car Name    Year
523331       Packard Twin Six  1924.0
523333           Indian Scout  1924.0
523334   Ford Model T Touring  1924.0
523335  Ford Model T Roadster  1924.0


In [None]:
# Machine Learning Models
# Car-Genre Prediction
if {'Car Name', 'Car Year', 'Brand', 'Genre'}.issubset(df.columns):
    X = df[['Car Name', 'Car Year', 'Brand']].dropna()  # Ensure no missing values
    X['Car Name'] = X['Car Name'].astype('category').cat.codes  # Encode car names
    X['Brand'] = X['Brand'].astype('category').cat.codes  # Encode brands
    y = df['Genre'].dropna().apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else None).dropna()  # Take the first genre as target

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # Train model
    model = RandomForestClassifier()
    model.fit(X_train, y_train)

    # Evaluate model
    y_pred = model.predict(X_test)
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
else:
    print("Required columns for Machine Learning are missing!")


Required columns for Machine Learning are missing!


In [None]:
# Interactive Dashboard
# Set up a Dash app
app = dash.Dash(__name__)
server = Flask(__name__)

app.layout = html.Div([
    dcc.Dropdown(
        id='car-model',
        options=[{'label': i, 'value': i} for i in df['Car Name'].unique()] if 'Car Name' in df.columns else [],
        placeholder="Select Car Model"
    ),
    dcc.Graph(id='trend-graph')
])

@app.callback(
    Output('trend-graph', 'figure'),
    [Input('car-model', 'value')]
)
def update_graph(selected_model):
    if selected_model is None or 'Car Name' not in df.columns or 'Decade' not in df.columns:
        return px.bar(title="Data Missing")

    filtered_data = df[df['Car Name'] == selected_model]
    if filtered_data.empty:
        return px.bar(title=f"No data for {selected_model}")

    if 'Count' not in filtered_data.columns:
        filtered_data['Count'] = filtered_data.groupby(['Decade'])['Car Name'].transform('count')

    fig = px.bar(filtered_data, x='Decade', y='Count', title=f"Trends for {selected_model}")
    return fig

# Run Dash app
if __name__ == '__main__':
    app.run_server(debug=True, use_reloader=False)


<IPython.core.display.Javascript object>