In [35]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns
from mobile_analysis import MobilePlanAnalyzer
customer_usage = pd.read_csv('/workspaces/task_mobile/src/insights/CSV_files/Customer_Usage_Last_12_Months_new.csv', delimiter=";")
customer_usage['Date'] = pd.to_datetime(customer_usage['Date'])
customer_usage['Data'] = pd.to_numeric(customer_usage['Data'].str.replace(',', '.'), errors='coerce')
mobile_plans = pd.read_csv('/workspaces/task_mobile/src/insights/CSV_files/Mobile_Plans_Test_Data.csv')

customer_usage['Date'] = pd.to_datetime(customer_usage['Date'])

In [36]:
customer_averages = customer_usage.groupby('CustomerID')[['Data', 'Minute', 'SMS']].median()

In [37]:
analyzer = MobilePlanAnalyzer()
comparison_df = analyzer.analyze_plans()


In [38]:
merged_df = pd.merge(customer_averages, comparison_df, on='CustomerID')

merged_df.head()

Unnamed: 0,CustomerID,Data,Minute,SMS,Best Plan,Provider,Total Cost
0,C1001,31.45,620.0,577.0,Unlimited,ProviderB,70.0
1,C1002,25.0,546.0,650.5,Premium,ProviderA,65.31
2,C1003,34.35,758.5,438.0,Unlimited,ProviderB,70.0
3,C1004,32.15,731.0,562.5,Unlimited,ProviderB,70.0
4,C1005,34.8,347.0,370.5,Unlimited,ProviderB,70.0


In [39]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

X = merged_df[['Data', 'Minute', 'SMS']]
y = merged_df['Best Plan']

In [40]:
le = LabelEncoder()
y = le.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [41]:

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svm_classifier = SVC(kernel='rbf', random_state=42)
svm_classifier.fit(X_train_scaled, y_train)


In [42]:
train_score = svm_classifier.score(X_train_scaled, y_train)
test_score = svm_classifier.score(X_test_scaled, y_test)

print(f"Training accuracy: {train_score:.2f}")
print(f"Testing accuracy: {test_score:.2f}")

from sklearn.metrics import classification_report
y_pred = svm_classifier.predict(X_test_scaled)


Training accuracy: 0.97
Testing accuracy: 0.97


In [54]:
models_dir = os.path.join('src', 'insights', 'models')
if not os.path.exists(models_dir):
    os.makedirs(models_dir)

In [55]:
model_path = os.path.join(models_dir, 'mobile_plan_model.joblib')
scaler_path = os.path.join(models_dir, 'scaler.joblib')
le_path = os.path.join(models_dir, 'label_encoder.joblib')

joblib.dump(svm_classifier, model_path)
joblib.dump(scaler, scaler_path)
joblib.dump(le, le_path)

print(f"Model saved to: {model_path}")
print(f"Scaler saved to: {scaler_path}")
print(f"Label Encoder saved to: {le_path}")

Model saved to: src/insights/models/mobile_plan_model.joblib
Scaler saved to: src/insights/models/scaler.joblib
Label Encoder saved to: src/insights/models/label_encoder.joblib
