In [8]:

import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from difflib import get_close_matches
import joblib

# ---------------------------
# Recommendation Module
# ---------------------------
def recommend(
    user_input: dict,
    df: pd.DataFrame,
    preprocessor
) -> pd.DataFrame:
    """
    Returns top_n car recommendations based on cosine similarity
    between user input and dataset features.
    """
    # Normalize dataset text fields
    text_fields = ['company', 'model', 'fueltype', 'transmissiontype', 'bodystyle']
    for col in text_fields:
        if col in df.columns:
            df[col] = df[col].astype(str).str.strip().str.title()

    # Fuzzy match user inputs for company and model
    for field in ['company', 'model']:
        if field in user_input:
            match = get_close_matches(
                user_input[field].strip().title(),
                df[field].unique().tolist(),
                n=1,
                cutoff=0.6
            )
            if match:
                user_input[field] = match[0]

    # Lowercase user_input keys
    user_input = {k.strip().lower(): v for k, v in user_input.items()}

    # Prepare default template row
    feature_cols = preprocessor.feature_names_in_
    defaults = {}
    for feat in feature_cols:
        if feat in df.columns:
            if pd.api.types.is_numeric_dtype(df[feat]):
                defaults[feat] = df[feat].median()
            else:
                defaults[feat] = df[feat].mode().iat[0]

    # Build user DataFrame
    user_row = defaults.copy()
    for k, v in user_input.items():
        if k in user_row:
            if k in text_fields:
                user_row[k] = str(v).strip().title()
            else:
                user_row[k] = v
    user_df = pd.DataFrame([user_row])

    # Compute car_age if modelyear provided
    if 'modelyear' in user_df.columns and 'car_age' not in user_df.columns:
        current_year = pd.Timestamp.now().year
        user_df['car_age'] = current_year - int(user_df['modelyear'])

    # Transform features
    user_transformed = preprocessor.transform(user_df[feature_cols])
    all_transformed = preprocessor.transform(df[feature_cols])

    # Compute similarities
    sims = cosine_similarity(user_transformed, all_transformed).flatten()
    df['similarity'] = sims
    return df

# ---------------------------
# Main execution
# ---------------------------
if __name__ == '__main__':
    # Load data & artifacts
    df_clean = pd.read_csv('preprocess_used_cars.csv')
    df_clean.columns = df_clean.columns.str.strip().str.lower()
    preprocessor = joblib.load('preprocessor.joblib')

    # Prompt user for inputs
    print("Enter car preferences. Press Enter to skip a field and use default.")
    user_input = {
        'company': input("Company: "),
        'model': input("Model: "),
        'fueltype': input("Fuel Type: "),
        'transmissiontype': input("Transmission Type: "),
        'bodystyle': input("Body Style: "),
    }
    # Numeric inputs
    for field in ['modelyear', 'kilometer', 'price', 'qualityscore']:
        val = input(f"{field.title()} ({'numeric'}): ")
        if val.strip():
            user_input[field] = float(val) if '.' in val else int(val)

    # Get recommendations
    df_results = recommend(user_input, df_clean, preprocessor)
    # Display top 5
    top5 = df_results.sort_values('similarity', ascending=False).head(5)
    print("\nTop 5 Recommendations:")
    print(top5[['company', 'model', 'modelyear', 'kilometer', 'price', 'similarity']].to_string(index=False))


Enter car preferences. Press Enter to skip a field and use default.

Top 5 Recommendations:
      company       model  modelyear  kilometer       price  similarity
Maruti Suzuki Swift Dzire       2016      45408  5.45 Lakhs    0.666862
      Hyundai       Creta       2022      24057 17.35 Lakhs    0.644938
Maruti Suzuki       Swift       2018      25711  5.85 Lakhs    0.607798
         Tata        Hexa       2018      14713 11.25 Lakhs    0.607020
      Hyundai   Grand I10       2018      38520     5 Lakhs    0.606857
