In [25]:
import pandas as pd
df = pd.read_csv("camera_dataset.csv")
df.head()

Unnamed: 0,Model,Release date,Max resolution,Low resolution,Effective pixels,Zoom wide (W),Zoom tele (T),Normal focus range,Macro focus range,Storage included,Weight (inc. batteries),Dimensions,Price
0,Agfa ePhoto 1280,1997,1024.0,640.0,0.0,38.0,114.0,70.0,40.0,4.0,420.0,95.0,179.0
1,Agfa ePhoto 1680,1998,1280.0,640.0,1.0,38.0,114.0,50.0,0.0,4.0,420.0,158.0,179.0
2,Agfa ePhoto CL18,2000,640.0,0.0,0.0,45.0,45.0,0.0,0.0,2.0,0.0,0.0,179.0
3,Agfa ePhoto CL30,1999,1152.0,640.0,0.0,35.0,35.0,0.0,0.0,4.0,0.0,0.0,269.0
4,Agfa ePhoto CL30 Clik!,1999,1152.0,640.0,0.0,43.0,43.0,50.0,0.0,40.0,300.0,128.0,1299.0


In [26]:
df.info()
df.head()
df.isna().sum()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1038 entries, 0 to 1037
Data columns (total 13 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Model                    1038 non-null   object 
 1   Release date             1038 non-null   int64  
 2   Max resolution           1038 non-null   float64
 3   Low resolution           1038 non-null   float64
 4   Effective pixels         1038 non-null   float64
 5   Zoom wide (W)            1038 non-null   float64
 6   Zoom tele (T)            1038 non-null   float64
 7   Normal focus range       1038 non-null   float64
 8   Macro focus range        1037 non-null   float64
 9   Storage included         1036 non-null   float64
 10  Weight (inc. batteries)  1036 non-null   float64
 11  Dimensions               1036 non-null   float64
 12  Price                    1038 non-null   float64
dtypes: float64(11), int64(1), object(1)
memory usage: 105.6+ KB


Unnamed: 0,0
Model,0
Release date,0
Max resolution,0
Low resolution,0
Effective pixels,0
Zoom wide (W),0
Zoom tele (T),0
Normal focus range,0
Macro focus range,1
Storage included,2


In [27]:
df['Price'] = pd.to_numeric(df['Price'], errors='coerce')
q3 = df['Price'].quantile(0.75)
df['is_premium'] = (df['Price'] >= q3).astype(int)

df['is_premium'].value_counts()

Unnamed: 0_level_0,count
is_premium,Unnamed: 1_level_1
0,756
1,282


In [28]:
X = df.select_dtypes(include=['number']).drop(columns=['is_premium'])
y = df['is_premium']

In [29]:
X = X.fillna(X.median())

In [30]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [31]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

In [32]:
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.svm import SVC

rf = RandomForestClassifier(n_estimators=300, random_state=42)
svc = SVC(kernel='rbf', C=3, probability=True, random_state=42)

In [33]:
ensemble = VotingClassifier(
    estimators=[('rf', rf), ('svc', svc)],
    voting='soft'
)

In [34]:
ensemble.fit(X_train, y_train)

In [35]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

pred = ensemble.predict(X_test)

acc = accuracy_score(y_test, pred)
report = classification_report(y_test, pred)
cm = confusion_matrix(y_test, pred)

acc, report, cm

(0.9855769230769231,
 '              precision    recall  f1-score   support\n\n           0       0.99      0.99      0.99       151\n           1       0.97      0.98      0.97        57\n\n    accuracy                           0.99       208\n   macro avg       0.98      0.98      0.98       208\nweighted avg       0.99      0.99      0.99       208\n',
 array([[149,   2],
        [  1,  56]]))

In [36]:
import os
os.makedirs("/content/drive/MyDrive/models", exist_ok=True)

In [37]:
joblib.dump({
    "model": ensemble,
    "scaler": scaler,
    "features": list(X.columns)
}, "/content/drive/MyDrive/models/models_camera.joblib")

['/content/drive/MyDrive/models/models_camera.joblib']