In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [2]:
# Generating synthetic actor data
np.random.seed(42)

In [3]:
# Actor names (for illustration, real-world data would have more features)
actors = ['Actor_' + str(i) for i in range(1, 101)]
debut_years = np.random.randint(1970, 2010, size=100)  # Years between 1970 and 2010
num_films = np.random.randint(1, 50, size=100)  # Number of films between 1 and 50
current_year = 2024
ages = current_year - debut_years + np.random.randint(20, 30, size=100)  # Age based on debut year and random addition

# Creating a DataFrame
data = pd.DataFrame({
    'Actor_Name': actors,
    'Debut_Year': debut_years,
    'Number_of_Films': num_films,
    'Age': ages
})

# Display the first few rows of the dataset
print(data.head())

# Save to CSV
data.to_csv('actor_age_dataset.csv', index=False)

  Actor_Name  Debut_Year  Number_of_Films  Age
0    Actor_1        2008               48   36
1    Actor_2        1998               23   48
2    Actor_3        1984               24   61
3    Actor_4        1977               37   71
4    Actor_5        1990               35   63


In [5]:
# Load the generated dataset
data = pd.read_csv('actor_age_dataset.csv')

# Features (Debut_Year, Number_of_Films) and target (Age)
X = data[['Debut_Year', 'Number_of_Films']]
y = data['Age']

In [6]:
# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Initialize the Linear Regression model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

LinearRegression()

In [8]:
# Predict on the test set
y_pred = model.predict(X_test)

# Calculate the Mean Squared Error
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Display some predictions along with the actual ages
for i in range(5):
    print(f"Predicted Age: {y_pred[i]:.2f}, Actual Age: {y_test.values[i]}")

Mean Squared Error: 11.206875618547993
Predicted Age: 71.82, Actual Age: 67
Predicted Age: 77.81, Actual Age: 79
Predicted Age: 63.03, Actual Age: 66
Predicted Age: 72.08, Actual Age: 77
Predicted Age: 50.20, Actual Age: 53
