<a href="https://colab.research.google.com/github/aditii0077/waterqualitydetection/blob/main/waterquality.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
!pip install pandas numpy scikit-learn joblib streamlit fpdf sqlite3 seaborn matplotlib pyngrok


[31mERROR: Could not find a version that satisfies the requirement sqlite3 (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for sqlite3[0m[31m
[0m

In [9]:
import pandas as pd
import numpy as np
import joblib
import sqlite3
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load dataset (replace with your actual dataset)
df = pd.read_csv("water_potability.csv")  # Ensure you have this dataset in Colab

# Handling missing values
df.fillna(df.mean(), inplace=True)

# Define features and target
X = df.drop(columns=["Potability"])  # Features
y = df["Potability"]  # Target (1 = Pure, 0 = Impure)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Model Accuracy
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

# Save the Model and Scaler
joblib.dump(model, "water_purity_model.pkl")
joblib.dump(scaler, "scaler.pkl")

print("Model and Scaler saved successfully!")



Model Accuracy: 0.68
Model and Scaler saved successfully!


In [10]:
# Create SQLite Database
conn = sqlite3.connect("water_purity.db")
cursor = conn.cursor()

# Create Table for storing predictions
cursor.execute('''
    CREATE TABLE IF NOT EXISTS predictions (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        pH REAL,
        Hardness REAL,
        Solids REAL,
        Chloramines REAL,
        Sulfate REAL,
        Conductivity REAL,
        OrganicCarbon REAL,
        Trihalomethanes REAL,
        Turbidity REAL,
        Prediction TEXT
    )
''')

conn.commit()
conn.close()
print("Database setup completed.")


Database setup completed.


In [11]:
%%writefile app.py

import streamlit as st
import pandas as pd
import numpy as np
import joblib
import sqlite3
from fpdf import FPDF
from PIL import Image
import io

# Load Model & Scaler
def load_model():
    model = joblib.load("water_purity_model.pkl")
    scaler = joblib.load("scaler.pkl")
    return model, scaler

model, scaler = load_model()

# Function to Predict Water Purity
def predict_purity(features):
    features_scaled = scaler.transform([features])  # Apply same scaling as training
    prediction = model.predict(features_scaled)
    return "Pure" if prediction[0] == 1 else "Impure"

# Function to Store Prediction in Database
def store_prediction(features, result):
    conn = sqlite3.connect("water_purity.db")
    cursor = conn.cursor()

    cursor.execute('''
        INSERT INTO predictions (pH, Hardness, Solids, Chloramines, Sulfate, Conductivity, OrganicCarbon, Trihalomethanes, Turbidity, Prediction)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
    ''', (*features, result))

    conn.commit()
    conn.close()

# Generate PDF Report
def generate_report(result, features):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    pdf.cell(200, 10, txt="Water Purity Analysis Report", ln=True, align='C')
    pdf.ln(10)
    pdf.cell(200, 10, txt=f"Prediction: {result}", ln=True)
    pdf.ln(10)

    params = ["pH", "Hardness", "Solids", "Chloramines", "Sulfate", "Conductivity", "Organic Carbon", "Trihalomethanes", "Turbidity"]
    for param, value in zip(params, features):
        pdf.cell(200, 10, txt=f"{param}: {value}", ln=True)

    pdf_output = io.BytesIO()
    pdf.output(pdf_output)
    pdf_output.seek(0)
    return pdf_output

# Streamlit UI
st.set_page_config(page_title="Water Purity Assessment", layout="wide")
st.title("💧 Water Purity Assessment")
st.markdown("### Check the purity of water using Machine Learning")

# Sidebar Inputs
st.sidebar.header("Input Water Parameters")
pH = st.sidebar.slider("pH Level", 0.0, 14.0, 7.0)
Hardness = st.sidebar.slider("Hardness", 0, 300, 150)
Solids = st.sidebar.slider("Solids (ppm)", 0, 50000, 20000)
Chloramines = st.sidebar.slider("Chloramines", 0.0, 15.0, 7.0)
Sulfate = st.sidebar.slider("Sulfate", 0.0, 500.0, 250.0)
Conductivity = st.sidebar.slider("Conductivity", 0.0, 1000.0, 500.0)
Organic_carbon = st.sidebar.slider("Organic Carbon", 0.0, 30.0, 15.0)
Trihalomethanes = st.sidebar.slider("Trihalomethanes", 0.0, 120.0, 60.0)
Turbidity = st.sidebar.slider("Turbidity", 0.0, 10.0, 5.0)

if st.sidebar.button("Check Purity"):
    features = [pH, Hardness, Solids, Chloramines, Sulfate, Conductivity, Organic_carbon, Trihalomethanes, Turbidity]
    result = predict_purity(features)

    # Store in database
    store_prediction(features, result)

    st.subheader(f"Water is **{result}**")
    pdf = generate_report(result, features)
    st.download_button(label="Download Report", data=pdf, file_name="Water_Purity_Report.pdf", mime="application/pdf")



Writing app.py


In [None]:
!pip install pyngrok
from pyngrok import ngrok
!streamlit run app.py &

# Create ngrok tunnel
public_url = ngrok.connect(port=8501)
print("Public URL:", public_url)



Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8503[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8503[0m
[34m  External URL: [0m[1mhttp://34.105.123.57:8503[0m
[0m
