In [8]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
import joblib
import os

def train_model():
    # Create models directory if it doesn't exist
    if not os.path.exists('models'):
        os.makedirs('models')

    # Load dataset
    df = pd.read_csv('medical_tourism_ranking.csv')
    
    # Handle missing values
    df = df.dropna(subset=['countrycode'])
    
    # Calculate Medical Tourism Score (weighted average of normalized features)
    features = [
        'Hospital Beds per 1,000',
        'Health Spending per Capita (USD)',
        'GDP per Capita (USD)',
        'Tourist Arrivals per Year',
        'Ease of Doing Business Score',
        'Safety Index (Homicide Rate)'
    ]
    
    # Normalize features and calculate score
    scaler = StandardScaler()
    X = df[features]
    X_scaled = scaler.fit_transform(X)
    
    # Calculate weighted score (you can adjust weights based on importance)
    weights = [0.2, 0.2, 0.15, 0.15, 0.15, 0.15]
    df['Medical Tourism Score'] = (X_scaled * weights).sum(axis=1)
    
    # Train KNN model
    model = NearestNeighbors(n_neighbors=5, metric='cosine', algorithm='brute')
    model.fit(X_scaled)
    
    # Save artifacts
    joblib.dump(model, 'models/medical_tourism_model.pkl')
    joblib.dump(scaler, 'models/scaler.pkl')
    df.to_pickle('models/cities_df.pkl')
    
    print("Model training and saving completed!")

if __name__ == "__main__":
    train_model()

Model training and saving completed!


In [9]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
import joblib
import os

def train_model():
    # Create models directory if it doesn't exist
    if not os.path.exists('models_mice'):
        os.makedirs('models_mice')

    # Load dataset
    df = pd.read_csv('mice_destination_ranking.csv')
    
    # Handle missing values
    df = df.dropna(subset=['countrycode'])
    
    # Calculate City Ranking Score (weighted average of normalized features)
    features = [
        'Ease of Doing Business Score',
        'GDP per Capita (USD)',
        'International Air Passengers',
        'Tourist Arrivals',
        'Safety Index (Homicide Rate)',
        'MICE Score'
    ]
    
    # Normalize features and calculate score
    scaler = StandardScaler()
    X = df[features]
    X_scaled = scaler.fit_transform(X)
    
    # Calculate weighted score (you can adjust weights based on importance)
    weights = [0.2, 0.2, 0.15, 0.15, 0.15, 0.15]
    df['City Ranking Score'] = (X_scaled * weights).sum(axis=1)
    
    # Train KNN model
    model = NearestNeighbors(n_neighbors=5, metric='cosine', algorithm='brute')
    model.fit(X_scaled)
    
    # Save artifacts
    joblib.dump(model, 'models_mice/city_ranking_model.pkl')
    joblib.dump(scaler, 'models_mice/scaler.pkl')
    df.to_pickle('models_mice/cities_df.pkl')
    
    print("Model training and saving completed!")

if __name__ == "__main__":
    train_model()


Model training and saving completed!
