import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MultiLabelBinarizer

# Load the movie dataset
movie_data = pd.read_csv('movie_data.csv')  # Replace with the actual path to your dataset

# Explore the dataset
print(movie_data.head())

# Preprocess the data
# Assuming you have a 'Genre', 'Director', and 'Actors' column

# Convert genres, directors, and actors into binary columns
mlb = MultiLabelBinarizer()

genres_encoded = pd.DataFrame(mlb.fit_transform(movie_data['Genre'].apply(lambda x: x.split(','))), columns=mlb.classes_, index=movie_data.index)
directors_encoded = pd.DataFrame(mlb.fit_transform(movie_data['Director'].apply(lambda x: x.split(','))), columns=mlb.classes_, index=movie_data.index)
actors_encoded = pd.DataFrame(mlb.fit_transform(movie_data['Actors'].apply(lambda x: x.split(','))), columns=mlb.classes_, index=movie_data.index)

# Combine the encoded features with the original dataset
movie_data = pd.concat([movie_data, genres_encoded, directors_encoded, actors_encoded], axis=1)

# Select features and target variable
features = movie_data.drop(['Title', 'Genre', 'Director', 'Actors', 'Rating'], axis=1)
target = movie_data['Rating']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Build and train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')
