In [1]:
import os
import pandas as pd
import lightgbm as lgb

ModuleNotFoundError: No module named 'lightgbm'

In [None]:

# Function to load and preprocess data from CSV files
def load_and_preprocess_data(directory):
    feature_list = []
    ranking_list = []
    for file_name in os.listdir(directory):
        if file_name.endswith('.csv'):
            file_path = os.path.join(directory, file_name)
            data = pd.read_csv(file_path)
            features = data[['#candidates', 'type', 'level']].copy()
            features['num_voters'] = len(data)
            rankings = data['rankings']
            feature_list.append(features)
            ranking_list.append(rankings)
    # Combine all features and rankings into a single DataFrame
    combined_features = pd.concat(feature_list, ignore_index=True)
    combined_rankings = pd.concat(ranking_list, ignore_index=True)
    return combined_features, combined_rankings

# Function to train a ranking model
def train_ranking_model(features, rankings):
    train_data = lgb.Dataset(features, label=rankings)
    params = {
        'objective': 'lambdarank',
        'metric': 'ndcg',
        'ndcg_at': [5],  # Adjust based on your requirement
        'learning_rate': 0.1,
        'num_leaves': 31,
        'min_data_in_leaf': 20,
    }
    ranker = lgb.train(params, train_data, num_boost_round=100)
    return ranker

# Function to predict rankings for new data
def predict_rankings(model, features):
    predictions = model.predict(features)
    ranked_items = [sorted(range(len(p)), key=lambda i: p[i], reverse=True) for p in predictions]
    return ranked_items

# Function to save the predicted rankings to a new CSV file
def save_predictions(predictions, output_file_path):
    predictions_df = pd.DataFrame(predictions)
    predictions_df.to_csv(output_file_path, index=False)

# Main function to process all files in a directory and generate predictions
def main(input_directory, output_file_path):
    # Load and preprocess training data
    features, rankings = load_and_preprocess_data(input_directory)
    
    # Train the ranking model
    ranker = train_ranking_model(features, rankings)
    
    # Predict rankings for the features
    predictions = predict_rankings(ranker, features)
    
    # Save predictions to output file
    save_predictions(predictions, output_file_path)

# Directory containing the input CSV files and the output file path
input_directory = 'path/to/csv_files'
output_file_path = 'predicted_rankings.csv'

# Run the main function
main(input_directory, output_file_path)
