# ***Prompt for file and load data***

In [19]:
from google.colab import files
import io

uploaded = files.upload()

Saving winequality-combined.csv to winequality-combined (3).csv


# ***Load data***

In [25]:
import pandas as pd

# Extract filename
filename = list(uploaded.keys())[0]

#    Load CSV from either:
#      - a filename (string path)
#      - an uploaded file from Colab's files.upload() dict
#      - a Flask file object (from request.files)

if isinstance(uploaded[filename], str):
  # Assume it's a file path
  df_wine = pd.read_csv(uploaded[filename], sep="\t")
elif hasattr(uploaded[filename], 'read'):
  # Flask's file object or BytesIO
  df_wine = pd.read_csv(io.BytesIO(uploaded[filename].read()), sep="\t")
elif isinstance(uploaded[filename], bytes):
  # Bytes directly (Colab uploaded dict value)
  df_wine = pd.read_csv(io.BytesIO(uploaded[filename]), sep="\t")
else:
  raise ValueError("Unsupported file source type.")

print(df_wine.head())


   fixed_acidity  volatile_acidity  citric_acid  residual_sugar  chlorides  \
0            7.0              0.27         0.36            20.7      0.045   
1            6.3              0.30         0.34             1.6      0.049   
2            8.1              0.28         0.40             6.9      0.050   
3            7.2              0.23         0.32             8.5      0.058   
4            7.2              0.23         0.32             8.5      0.058   

   free_sulfur_dioxide  total_sulfur_dioxide  density    pH  sulphates  \
0                 45.0                 170.0   1.0010  3.00       0.45   
1                 14.0                 132.0   0.9940  3.30       0.49   
2                 30.0                  97.0   0.9951  3.26       0.44   
3                 47.0                 186.0   0.9956  3.19       0.40   
4                 47.0                 186.0   0.9956  3.19       0.40   

   alcohol  quality  
0      8.8        6  
1      9.5        6  
2     10.1        6 

# ***Split the data for training and testing***

In [26]:
from sklearn.model_selection import train_test_split

train_cols = [
    "fixed_acidity","volatile_acidity","citric_acid","residual_sugar",
    "chlorides","free_sulfur_dioxide","total_sulfur_dioxide","density",
    "pH","sulphates","alcohol"
]

x = df_wine[train_cols] # Use all features to train model
y = df_wine['quality'] # "quality" is our target

x_train, x_test, y_train, y_test = train_test_split(x,  y, stratify=y, test_size=0.2, random_state=5)


# ***Create and fit Logistic Regression***

In [27]:
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Pipeline: SMOTE -> Scale -> Logistic Regression
pipe = Pipeline(steps=[
    ("smote", SMOTE(random_state=42, k_neighbors=2)),
    ("scaler", StandardScaler()),
    ("clf", LogisticRegression(
        C=100.0,
        penalty="l1",
        solver="saga",
        tol=0.001,
        warm_start=True,
        max_iter=2000
    ))
])

pipe.fit(x_train, y_train)

# Perform predictions and evaluate results
y_pred = pipe.predict(x_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"Classification report:\n{classification_report(y_test, y_pred)}")


Accuracy: 0.33153846153846156
Classification report:
              precision    recall  f1-score   support

           3       0.01      0.17      0.01         6
           4       0.12      0.49      0.19        43
           5       0.56      0.42      0.48       428
           6       0.59      0.27      0.37       567
           7       0.30      0.29      0.29       216
           8       0.08      0.36      0.13        39
           9       0.04      1.00      0.07         1

    accuracy                           0.33      1300
   macro avg       0.24      0.43      0.22      1300
weighted avg       0.50      0.33      0.38      1300



# ***Save features and target as Pickle files***

In [28]:
import pickle

# Save the model to a pickle file
with open('lr.pkl', 'wb') as f:
  pickle.dump(pipe, f)

# Save the scaler to a pickle file
#with open('scaler.pkl', 'wb') as f:
#  pickle.dump(scaler, f)

# Download all pickle files
from google.colab import files
files.download("lr.pkl")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>