In [1]:
from sklearn.linear_model import LinearRegression
import pandas as pd
import pickle

def train_classifiers():
    # List of feature files and corresponding classifier names
    feature_files = [
        "features_NEW/News_Train_NEW_Feats1.pkl",
        "features_NEW/WikiNews_Train_NEW_Feats1.pkl",
        "features_NEW/Wikipedia_Train_NEW_Feats1.pkl",
        ["features_NEW/News_Train_NEW_Feats1.pkl", "features_NEW/WikiNews_Train_NEW_Feats1.pkl", "features_NEW/Wikipedia_Train_NEW_Feats1.pkl"]
    ]
    classifier_names = ["News_prob_NEW", "Wikinews_prob_NEW", "Wikipedia_prob_NEW", "Combined_prob_NEW"]
    classifiers = []

    for feature_file, classifier_name in zip(feature_files, classifier_names):
        # Load the word_features DataFrame from the pickle file
        if isinstance(feature_file, list):
            word_features = pd.concat([pd.read_pickle(f) for f in feature_file])
        else:
            word_features = pd.read_pickle(feature_file)

        # Replace NaN values with 0 in the feature columns
        feature_columns = ['syllables', 'characters', 'vowels', 'simple_wiki_freq', 'HIT_count', 'absTotalMatchCount', 'relTotalMatchCount']
        word_features[feature_columns] = word_features[feature_columns].fillna(0)

        # Extract the features and labels from the word_features DataFrame
        X = word_features[feature_columns].values
        y = word_features['complex_probabilistic'].values

        # Create and train the Linear Regression model
        clf = LinearRegression()
        clf.fit(X, y)

        # Save the trained classifier to a pickle file
        with open(f"lmodel/prob/{classifier_name}.pkl", 'wb') as file:
            pickle.dump(clf, file)

        classifiers.append(clf)

    return classifiers

# Train the classifiers and save them to pickle files
classifiers = train_classifiers()
