# 🎧 HitSense EDA + Baseline Modeling Notebook
This notebook explores music data to predict hit potential using regression.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Set visual style
sns.set(style="whitegrid")

In [None]:
os.makedirs('data', exist_ok=True)

sample_data = {
    'danceability': [0.5, 0.8, 0.6],
    'energy': [0.7, 0.6, 0.8],
    'tempo': [120, 130, 110],
    'valence': [0.4, 0.6, 0.3],
    'acousticness': [0.1, 0.05, 0.2],
    'popularity': [50, 80, 30]
}
df = pd.DataFrame(sample_data)
df.to_csv('data/hitsense_raw.csv', index=False)
df.head()

In [None]:
print("Data Info:")
df.info()
print("\nMissing Values:")
print(df.isnull().sum())
print("\nSummary Stats:")
print(df.describe())

In [None]:
plt.figure(figsize=(8, 5))
sns.histplot(df['popularity'], kde=True, bins=10)
plt.title("Popularity Distribution")
plt.xlabel("Popularity")
plt.ylabel("Count")
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.heatmap(df.corr(), annot=True, cmap="coolwarm")
plt.title("Feature Correlation Matrix")
plt.show()

In [None]:
df['hit_score'] = df['popularity'] / 100
df['vibe_score'] = df['danceability'] * df['valence']
df.to_csv('data/hitsense_cleaned.csv', index=False)
df.head()

In [None]:
# Define features and target
features = df[['danceability', 'energy', 'tempo', 'valence', 'acousticness', 'vibe_score']]
target = df['hit_score']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Display model coefficients
print("Model Coefficients:")
for feature, coef in zip(features.columns, model.coef_):
    print(f"{feature}: {coef:.4f}")

print(f"Intercept: {model.intercept_:.4f}")

In [None]:
# Generate predictions
predictions = model.predict(X_test)

# Evaluate model
mse = mean_squared_error(y_test, predictions)
r2 = r2_score(y_test, predictions)

print("\nModel Evaluation:")
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"R² Score: {r2:.4f}")