In [1]:
!pip install plotly

Collecting plotly
  Using cached plotly-6.1.2-py3-none-any.whl.metadata (6.9 kB)
Downloading plotly-6.1.2-py3-none-any.whl (16.3 MB)
   ---------------------------------------- 0.0/16.3 MB ? eta -:--:--
    --------------------------------------- 0.3/16.3 MB ? eta -:--:--
   - -------------------------------------- 0.5/16.3 MB 2.1 MB/s eta 0:00:08
   -- ------------------------------------- 1.0/16.3 MB 2.0 MB/s eta 0:00:08
   --- ------------------------------------ 1.3/16.3 MB 1.9 MB/s eta 0:00:09
   --- ------------------------------------ 1.6/16.3 MB 1.7 MB/s eta 0:00:09
   ---- ----------------------------------- 1.8/16.3 MB 1.5 MB/s eta 0:00:10
   ----- ---------------------------------- 2.1/16.3 MB 1.6 MB/s eta 0:00:09
   ----- ---------------------------------- 2.1/16.3 MB 1.6 MB/s eta 0:00:09
   ----- ---------------------------------- 2.4/16.3 MB 1.4 MB/s eta 0:00:11
   ------- -------------------------------- 2.9/16.3 MB 1.4 MB/s eta 0:00:10
   ------- -----------------------

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pickle

# Load dataset
df = pd.read_csv("WineQT.csv")

# Define features and target
X = df.drop(columns=['quality', 'Id'])
y = (df['quality'] >= 6).astype(int)  # Convert quality into binary (6 or higher: Good, below 6: Bad)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predictions and accuracy
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Model Accuracy: {accuracy:.2f}")
print("Classification Report:\n", classification_report(y_test, y_pred))

# Save model and scaler
pickle.dump(model, open("wine_model.pkl", "wb"))
pickle.dump(scaler, open("scaler.pkl", "wb"))
pickle.dump(accuracy, open("accuracy.pkl", "wb"))  # Save accuracy

# Visualize confusion matrix
plt.figure(figsize=(6, 4))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt="d", cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

In [None]:
%%writefile app.py

import streamlit as st
import numpy as np
import pickle
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px

# Load trained model and scaler
model = pickle.load(open("wine_model.pkl", "rb"))
scaler = pickle.load(open("scaler.pkl", "rb"))
accuracy = pickle.load(open("accuracy.pkl", "rb"))  # Load accuracy

# Streamlit App Title
st.title("🍷 Wine Quality Prediction App")

# Sidebar for User Input
st.sidebar.header("Enter Wine Features")
fixed_acidity = st.sidebar.number_input("Fixed Acidity", 4.0, 16.0, step=0.1)
volatile_acidity = st.sidebar.number_input("Volatile Acidity", 0.1, 2.0, step=0.01)
citric_acid = st.sidebar.number_input("Citric Acid", 0.0, 1.5, step=0.01)
residual_sugar = st.sidebar.number_input("Residual Sugar", 0.5, 15.0, step=0.1)
chlorides = st.sidebar.number_input("Chlorides", 0.01, 0.2, step=0.001)
free_sulfur_dioxide = st.sidebar.number_input("Free Sulfur Dioxide", 1.0, 75.0, step=1.0)
total_sulfur_dioxide = st.sidebar.number_input("Total Sulfur Dioxide", 5.0, 200.0, step=5.0)
density = st.sidebar.number_input("Density", 0.9900, 1.0050, step=0.0001)
pH = st.sidebar.number_input("pH", 2.5, 4.5, step=0.01)
sulphates = st.sidebar.number_input("Sulphates", 0.3, 1.5, step=0.01)
alcohol = st.sidebar.number_input("Alcohol", 8.0, 15.0, step=0.1)

# Button to Predict Quality
if st.sidebar.button("Predict Wine Quality"):
    features = np.array([[fixed_acidity, volatile_acidity, citric_acid, residual_sugar, chlorides,
                          free_sulfur_dioxide, total_sulfur_dioxide, density, pH, sulphates, alcohol]])
    scaled_features = scaler.transform(features)
    
    prediction = model.predict(scaled_features)
    confidence = model.predict_proba(scaled_features)[0][1]  # Confidence score

    quality = "Good 🍷" if prediction[0] == 1 else "Bad 🍷"
    st.subheader(f"Predicted Wine Quality: **{quality}**")
    st.write(f"**Confidence Score:** {confidence:.2%}")

    # Feature Importance Plot
    importance = abs(model.coef_[0])
    feature_names = ['Fixed Acidity', 'Volatile Acidity', 'Citric Acid', 'Residual Sugar', 
                     'Chlorides', 'Free Sulfur Dioxide', 'Total Sulfur Dioxide', 'Density', 
                     'pH', 'Sulphates', 'Alcohol']

    fig, ax = plt.subplots(figsize=(8, 5))
    ax.barh(feature_names, importance, color="skyblue")
    ax.set_xlabel("Importance")
    ax.set_title("Feature Importance in Prediction")
    st.pyplot(fig)

    # Radar Chart for User Input
    fig_radar = px.line_polar(pd.DataFrame({"Feature": feature_names, "Value": scaled_features[0]}),
                              r="Value", theta="Feature", line_close=True)
    st.plotly_chart(fig_radar)

# Display Model Accuracy
st.sidebar.subheader(f"Model Accuracy: **{accuracy:.2f}**")

In [None]:
!streamlit run app.py