<a href="https://colab.research.google.com/github/amira12345-3/amiratareks.github.io/blob/main/grade9.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip -q install pandas scikit-learn gradio joblib openpyxl


In [None]:
from google.colab import files
import io, pandas as pd, os

print("‚û°Ô∏è Click 'Choose Files' and upload your Excel (.xlsx/.xls) or CSV.")
uploaded = files.upload()

# Use the first uploaded file
fname = next(iter(uploaded))
ext = os.path.splitext(fname)[1].lower()

if ext in [".xlsx", ".xls"]:
    df = pd.read_excel(io.BytesIO(uploaded[fname]))
elif ext == ".csv":
    # Try utf-8 first, fall back to latin-1 if needed
    try:
        df = pd.read_csv(io.BytesIO(uploaded[fname]))
    except UnicodeDecodeError:
        df = pd.read_csv(io.BytesIO(uploaded[fname]), encoding="latin-1")
else:
    raise ValueError("Please upload a .csv, .xlsx, or .xls file.")

print("‚úÖ Loaded shape:", df.shape)
df.head(3)


‚û°Ô∏è Click 'Choose Files' and upload your Excel (.xlsx/.xls) or CSV.


In [None]:
TARGET = "Future Job"   # <-- change to your target column name

assert TARGET in df.columns, f"TARGET '{TARGET}' not found. Columns: {list(df.columns)}"
X = df.drop(columns=[TARGET]).copy()
y = df[TARGET].copy()

print("Features:", list(X.columns))
print("Target:", TARGET)
print("Rows:", len(df))

Features: ['ID', 'Zodic Sign', 'Nationality', 'Games', 'Personal Interests', 'Height ', 'Personality', 'Hobbies', 'Age']
Target: Future Job
Rows: 1


In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
import joblib

# Define X and y within this cell
TARGET = "Future Job"   # <-- change to your target column name

assert TARGET in df.columns, f"TARGET '{TARGET}' not found. Columns: {list(df.columns)}"
X = df.drop(columns=[TARGET]).copy()
y = df[TARGET].copy()


# Heuristic: numeric target with many unique values -> regression; else classification
is_numeric_target = np.issubdtype(y.dtype, np.number)
unique_ratio = y.nunique() / max(1, len(y))
task = "regression" if (is_numeric_target and unique_ratio > 0.05) else "classification"
print("üß† Detected task:", task)

num_cols = X.select_dtypes(include=["number"]).columns.tolist()
cat_cols = X.select_dtypes(exclude=["number"]).columns.tolist()

pre = ColumnTransformer([
    ("num", "passthrough", num_cols),
    ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols)
], remainder="drop")

if task == "classification":
    model = RandomForestClassifier(n_estimators=400, random_state=42, n_jobs=-1)
else:
    model = RandomForestRegressor(n_estimators=400, random_state=42, n_jobs=-1)

pipe = Pipeline([("prep", pre), ("model", model)])

# Fit on the entire dataset regardless of the number of samples
pipe.fit(X, y)

# Evaluation metrics are not meaningful with a single sample
if len(df) <= 1:
    print("‚ö†Ô∏è Skipping evaluation due to single sample.")
else:
    # You would typically split into train/test sets here and evaluate
    print("‚úÖ Model trained.")


# Save model + metadata
joblib.dump({
    "pipeline": pipe,
    "task": task,
    "feature_names": X.columns.tolist(),
    "num_cols": num_cols,
    "cat_cols": cat_cols,
    "target": TARGET,
}, "excel_model.joblib")
print("üíæ Saved to excel_model.joblib")

üß† Detected task: classification
‚ö†Ô∏è Skipping evaluation due to single sample.
üíæ Saved to excel_model.joblib


In [None]:
import gradio as gr, joblib
import pandas as pd
meta = joblib.load("excel_model.joblib")
pipe = meta["pipeline"]
feature_names = meta["feature_names"]
num_cols, cat_cols = meta["num_cols"], meta["cat_cols"]
task = meta["task"]

def predict_row(*vals):
    row = pd.DataFrame([dict(zip(feature_names, vals))])
    out = pipe.predict(row)[0]
    if task == "classification":
        # If proba exists, add it
        proba_txt = ""
        if hasattr(pipe[-1], "predict_proba"):
            try:
                p = pipe.predict_proba(row)
                # Handle binary or multi-class
                proba_txt = f" | Probabilities: {p[0].round(3).tolist()}"
            except Exception:
                pass
        return f"Prediction: {out}{proba_txt}"
    else:
        return f"Prediction (numeric): {out}"

inputs = []
for c in feature_names:
    if c in num_cols:
        inputs.append(gr.Number(label=c))
    else:
        inputs.append(gr.Textbox(label=c, placeholder="text/category"))

demo = gr.Interface(fn=predict_row, inputs=inputs, outputs="text",
                    title="Excel ‚Üí AI Predictor",
                    description=f"Task: {task.upper()} | Target: {meta['target']}")

demo.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://0042e3319e42408f42.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


