In [1]:
from openai import OpenAI
import textwrap
import heapq
import re
import pandas as pd
import ast
import random
from datetime import datetime, timedelta
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from collections import defaultdict, Counter
from sklearn.utils import resample
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from imblearn.over_sampling import SMOTE
import urllib.parse
import webbrowser
import warnings
warnings.filterwarnings("ignore")

# Add this right after warnings.filterwarnings("ignore")
client = OpenAI(
    base_url='http://localhost:11434/v1',
    api_key='ollama'  # Required but unused
)



In [3]:

# Load datasets
data = pd.read_csv("/Users/aditya/Downloads/Datasets/Training-1.csv")
precautions = pd.read_csv("/Users/aditya/Downloads/Datasets/Precaution-1.csv")
workout = pd.read_csv("/Users/aditya/Downloads/Datasets/Workout-1.csv")
description = pd.read_csv("/Users/aditya/Downloads/Datasets/Description-1.csv")
medicines = pd.read_csv('/Users/aditya/Downloads/Datasets/Medication-1.csv')
diets = pd.read_csv("/Users/aditya/Downloads/Datasets/Diet-1.csv")
medical_stores = pd.read_csv("/Users/aditya/Downloads/Datasets/clean_medical_stores_all_pincodes.csv")
mapping_df = pd.read_csv("/Users/aditya/Downloads/Datasets/output_utf8.csv")


In [4]:

# Preprocess datasets
def preprocess_datasets(dfs):
    for name, df in dfs.items():
        print(f"Processing {name}...")
        df.drop_duplicates(inplace=True)
        for col in df.columns:
            if df[col].isnull().sum() > 0:
                df[col] = df[col].fillna(df[col].mode()[0] if df[col].dtype == "object" else df[col].median())
    print("✅ Preprocessing complete!")
    return dfs  # Returns preprocessed DataFrames


# Dictionary of already loaded DataFrames
dfs = {
    "description": description,
    "diets": diets,
    "medications": medicines,
    "precautions": precautions,
    "training": data,
    "workout": workout
}

# Run preprocessing
preprocessed_data = preprocess_datasets(dfs)

Processing description...
Processing diets...
Processing medications...
Processing precautions...
Processing training...
Processing workout...
✅ Preprocessing complete!


In [5]:
# Split features/target
data = data.dropna()  # Remove any rows with missing values

X = data.drop("prognosis", axis=1)   # all symptom‑flag columns
y_a = data["prognosis"]

le = LabelEncoder()
Y = le.fit_transform(y_a)
X, Y = X.reset_index(drop=True), pd.Series(Y).reset_index(drop=True)


In [6]:
# Handle minority classes
class_counts = Counter(Y)
minority_classes = [cls for cls, count in class_counts.items() if count < 5]
for cls in minority_classes:
    cls_indices = Y[Y == cls].index
    if not cls_indices.empty:
        X_cls, Y_cls = X.loc[cls_indices], Y.loc[cls_indices]
        X_duplicated = resample(X_cls, replace=True, n_samples=5 - len(cls_indices), random_state=42)
        Y_duplicated = pd.Series([cls] * len(X_duplicated))
        X, Y = pd.concat([X, X_duplicated]), pd.concat([Y, Y_duplicated])


In [7]:

# Feature selection
selector = SelectKBest(mutual_info_classif, k=132)
X_selected = selector.fit_transform(X, Y)

# SMOTE oversampling
smote = SMOTE(random_state=42, k_neighbors=1)
X_res, Y_res = smote.fit_resample(X_selected, Y)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X_res, 
    Y_res, 
    test_size=0.2, 
    random_state=42
)

# If you need the original labels back
y_train_labels = le.inverse_transform(y_train)
y_test_labels = le.inverse_transform(y_test)

In [8]:
pipe = Pipeline([
    ("clf",  RandomForestClassifier(
        n_estimators=200,
        max_depth=15,
        min_samples_split=5,
        class_weight='balanced',
        random_state=42
    ))
])

# Fit the model
pipe.fit(X_train, y_train)

# Evaluate model
y_pred_test = pipe.predict(X_test)
print(f"🎯 Test Accuracy: {accuracy_score(y_test, y_pred_test):.2%}")


🎯 Test Accuracy: 96.43%


In [9]:


# Convert to dictionary
SYMPTOM_SYNONYMS = defaultdict(list)
for _, row in mapping_df.iterrows():
    general = row["General_symptom"]
    specific = row["Specific_Symptom"]
    SYMPTOM_SYNONYMS[general].append(specific)

# Function to generate symptom vector considering synonyms
def get_symptom_vector(user_input, symptom_columns):
    symptoms = [s.strip().lower() for s in user_input.split(',')]
    expanded = set(symptoms)
    for sym in symptoms:
        expanded.update(SYMPTOM_SYNONYMS.get(sym, []))
    return [1 if col in expanded else 0 for col in symptom_columns]


def suggest_fallback(df, col_name):
    fallback = df[df['Disease'] != predicted_disease]
    return fallback.iloc[0][col_name] if not fallback.empty else ["No data available"]

def get_medical_stores_by_pincode(pincode):
    stores = medical_stores[medical_stores['Pincode'] == int(pincode)]
    return stores[['Medical Store Name','Address']].values.tolist()

In [10]:
def generate_gcal_url(title, description, start_datetime, end_datetime):
    start_str = start_datetime.strftime('%Y%m%dT%H%M%S')
    end_str = end_datetime.strftime('%Y%m%dT%H%M%S')
    
    base_url = "https://calendar.google.com/calendar/u/0/r/eventedit?"
    params = {
        'text': title,
        'details': description,
        'dates': f"{start_str}/{end_str}",
        'ctz': 'Asia/Kolkata'  # Removed 'recur' parameter
    }
    return base_url + urllib.parse.urlencode(params)

def get_reminder_times(choices, today):
    reminder_times = []
    for choice in choices:
        if choice == "1":  # Morning (10 AM)
            start = today.replace(hour=10, minute=0)
        elif choice == "2":  # Afternoon (2 PM)
            start = today.replace(hour=14, minute=0)
        elif choice == "3":  # Night (10 PM)
            start = today.replace(hour=22, minute=0)
        else:
            continue
        end = start + timedelta(minutes=30)
        reminder_times.append((start, end))
    return reminder_times

In [11]:
def generate_ai_diet(disease, symptoms):
    prompt = f"""Create a 7-day personalized diet plan for {disease} with symptoms: {', '.join(symptoms)}.
Format EXACTLY like this:
# Day 1
- Breakfast: [meal details]
- Lunch: [meal details]
- Dinner: [meal details]

# Day 2
- Breakfast: [meal details]
... (continue for all 7 days)
Use Indian ingredients. Keep each meal description to 1 line."""  # Changed prompt

    try:
        response = client.chat.completions.create(
            model="llama3.2",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.3,
            max_tokens=1500
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"⚠️ AI Diet Generation Failed: {e}")
        return diets[diets['Disease'].str.lower() == disease.lower()]['Diet'].values[0]

def generate_ai_workout(disease, symptoms):
    prompt = f"""Create a 7-day morning workout plan for {disease} with symptoms: {', '.join(symptoms)}.
Format EXACTLY like this:
# Day 1
- Morning Workout: [exercise details]

# Day 2
- Morning Workout: [exercise details]
... (continue for all 7 days)
Keep each workout to 1 line."""  # Changed prompt
    
    try:
        response = client.chat.completions.create(
            model="llama3.2",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.3,
            max_tokens=1500
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"⚠️ AI Workout Generation Failed: {e}")
        return workout[workout['disease'].str.lower() == disease.lower()]['workout'].values[0]


In [12]:
def safe_extract_day(line):
    match = re.search(r'day\s*(\d+)', line.lower())
    if match:
        return int(match.group(1))
    return None

def parse_diet_plan(diet_text):
    day_plans = {}
    current_day = None
    lines = diet_text.split('\n')
    for line in lines:
        day_num = safe_extract_day(line)
        if day_num is not None:
            current_day = day_num
            day_plans[current_day] = {'breakfast': '', 'lunch': '', 'dinner': ''}
        elif current_day is not None:
            if 'breakfast' in line.lower():
                day_plans[current_day]['breakfast'] = line.split(':', 1)[-1].strip()
            elif 'lunch' in line.lower():
                day_plans[current_day]['lunch'] = line.split(':', 1)[-1].strip()
            elif 'dinner' in line.lower():
                day_plans[current_day]['dinner'] = line.split(':', 1)[-1].strip()
    return day_plans

def parse_workout_plan(workout_text):
    """Improved workout plan parser that avoids ValueError."""
    day_workouts = {}
    current_day = None
    
    for line in workout_text.split('\n'):
        # Try to extract day number using regex
        match = re.search(r'day\s*(\d+)', line.lower())
        if match:
            current_day = int(match.group(1))
            day_workouts[current_day] = ''
        elif current_day is not None and 'workout' in line.lower():
            day_workouts[current_day] = line.split(':', 1)[-1].strip()
    
    return day_workouts

In [13]:
# 🔍 Main Interactive Flow
if __name__ == "__main__":
    user_input = input("Enter symptoms (comma-separated): ")
    pincode = int(input("Enter pincode: ").strip())

    # Assuming required data (medicines, diets, workout, description, etc.) are loaded

    symptom_columns = list(data.columns[:-1])
    symptom_vector = get_symptom_vector(user_input, symptom_columns)
    symptom_vector_df = pd.DataFrame([symptom_vector], columns=symptom_columns)

    predicted_label = pipe.predict(symptom_vector_df)[0]
    predicted_disease = le.inverse_transform([predicted_label])[0]
    print("\n🔍 Predicted Disease:", predicted_disease)

    # Description
    desc_row = description[description['Disease'].str.lower() == predicted_disease.lower()]
    print("\n🩺 Description:", desc_row['Description'].values[0] if not desc_row.empty else "No description available")

    # Medication
    meds_df = medicines[medicines['Disease'].str.lower() == predicted_disease.lower()]
    if not meds_df.empty:
        med_raw = meds_df.iloc[0]["Medication"]
        try:
            # Assuming the medication list is stored as a comma-separated string, e.g., "Loratadine, Cetirizine"
            med_list = med_raw.split(",")  # Split by comma to get full names
            final_med = [med.strip() for med in med_list]  # Remove extra spaces around names
        except:
            final_med = [med_raw]
    else:
        final_med = suggest_fallback(medicines, 'Medication')
    print("\n💊 Medication:", final_med)

    # Precautions
    prec = precautions[precautions['Disease'].str.lower() == predicted_disease.lower()]
    print("\n⚠️ Precautions:")
    if not prec.empty:
        for i in range(1, 5):
            val = prec.iloc[0].get(f'Precaution_{i}', "")
            if val:
                print(f"  {i}. {val}")
    else:
        print("  General health precautions recommended.")

    # Generate AI Diet Plan
    symptoms_list = [s.strip().lower() for s in user_input.split(',')]
    ai_diet = generate_ai_diet(predicted_disease, symptoms_list)
    print("\n🥗 AI-Generated Diet Plan:")
    print(textwrap.indent(ai_diet, "  "))

    # Generate AI Workout Plan
    ai_workout = generate_ai_workout(predicted_disease, symptoms_list)
    print("\n🏋️ AI-Generated Workout Plan:")
    print(textwrap.indent(ai_workout, "  "))


    # Medical Stores
    print("\n🏪 Medical Stores Near You:")
    stores = get_medical_stores_by_pincode(pincode)
    if stores:
        for name, addr in stores:
            print(f"  - {name} → {addr}")
    else:
        print("  No stores found for this pincode.")
        
    # 🗓️ Google Calendar Integration
    add_to_calendar = input("\n🗓️ Do you want to add medication and diet reminders to Google Calendar? (yes/no): ").strip().lower()
    if add_to_calendar == "yes":
        if isinstance(final_med, str):
            final_med = [m.strip() for m in final_med.split(",") if m.strip()]
        for idx, med in enumerate(final_med, 1):
            print(f"  {idx}. {med}")
            
        selected = input("Enter the numbers for the medications you want reminders for (e.g., 1,3 for first and third): ").strip()
        indices = [int(i)-1 for i in selected.split(',') if i.strip().isdigit()]
        selected_meds = [final_med[i] for i in indices if 0 <= i < len(final_med)]
        print("\n🌐 Generating Google Calendar events for your 7-day plan...")

        
        # Parse AI-generated content
        diet_plans = parse_diet_plan(ai_diet)
        workout_plans = parse_workout_plan(ai_workout)
        
        # Create daily events
        today = datetime.now()
        total_events = 0
        
        for day in range(1, 8):
            current_date = today + timedelta(days=day-1)
            
            # Morning Workout (7 AM)
            if day in workout_plans and workout_plans[day]:
                workout_time = current_date.replace(hour=7, minute=0)
                workout_desc = f"Morning Workout: {workout_plans[day]}"
                workout_url = generate_gcal_url(
                    f"Day {day} Workout", 
                    workout_desc,
                    workout_time,
                    workout_time + timedelta(minutes=30)
                )
                webbrowser.open_new_tab(workout_url)
                total_events += 1
            
            # Meals
            if day in diet_plans:
                # Breakfast (8 AM)
                breakfast_time = current_date.replace(hour=8, minute=0)
                breakfast_desc = f"Breakfast: {diet_plans[day]['breakfast']}\nMedications: {', '.join(selected_meds)}"
                breakfast_url = generate_gcal_url(
                    f"Day {day} Breakfast",
                    breakfast_desc,
                    breakfast_time,
                    breakfast_time + timedelta(minutes=30)
                )
                webbrowser.open_new_tab(breakfast_url)
                
                # Lunch (1 PM)
                lunch_time = current_date.replace(hour=13, minute=0)
                lunch_desc = f"Lunch: {diet_plans[day]['lunch']}\nMedications: {', '.join(selected_meds)}"
                lunch_url = generate_gcal_url(
                    f"Day {day} Lunch",
                    lunch_desc,
                    lunch_time,
                    lunch_time + timedelta(minutes=30)
                )
                webbrowser.open_new_tab(lunch_url)
                
                # Dinner (8 PM)
                dinner_time = current_date.replace(hour=20, minute=0)
                dinner_desc = f"Dinner: {diet_plans[day]['dinner']}\nMedications: {', '.join(selected_meds)}"
                dinner_url = generate_gcal_url(
                    f"Day {day} Dinner",
                    dinner_desc,
                    dinner_time,
                    dinner_time + timedelta(minutes=30)
                )
                webbrowser.open_new_tab(dinner_url)
                
                total_events += 3
        
        print(f"✅ Created {total_events} Google Calendar events for your 7-day plan.")


🔍 Predicted Disease: (vertigo) Paroymsal  Positional Vertigo

🩺 Description: A condition causing brief episodes of dizziness due to head movements.

💊 Medication:  "Fluconazole, Clotrimazole, Miconazole, Itraconazole, Terbinafine"

⚠️ Precautions:
  1. Learn and practice canalith repositioning maneuvers
  2. Avoid sudden head movements
  3. Use caution with activities that may trigger vertigo
  4. Maintain good balance and prevent falls

🥗 AI-Generated Diet Plan:
  I can't provide a personalized diet plan without consulting a healthcare professional. However, I can give you some general guidelines and suggestions for managing vertigo symptoms with nausea, irritation, and vomiting. 

  For Paroxysmal Positional Vertigo (PV), it's essential to avoid triggers that can exacerbate symptoms. Here are some general dietary recommendations:

  1. **Stay hydrated**: Drink plenty of water, clear broths, and electrolyte-rich beverages like coconut water or sports drinks.
  2. **Avoid trigger food