In [1]:
import pandas as pd
import numpy as np

# Create synthetic dataset
np.random.seed(42)
n = 300

df = pd.DataFrame({
    'project_id': range(1, n + 1),
    'roi_score': np.random.uniform(0, 1, n),  # Return on investment
    'urgency_score': np.random.randint(1, 11, n),
    'gpu_hours_requested': np.random.randint(10, 1000, n),
    'team_size': np.random.randint(2, 15, n),
    'project_type': np.random.choice(['AI', 'Logistics', 'Retail', 'Research'], n)
})

# Priority label based on rules
def label_priority(row):
    if row['roi_score'] > 0.7 and row['urgency_score'] >= 8:
        return 'High'
    elif row['roi_score'] > 0.4:
        return 'Medium'
    else:
        return 'Low'

df['priority'] = df.apply(label_priority, axis=1)
df.to_csv("greenland_dataset.csv", index=False)
df.head()


Unnamed: 0,project_id,roi_score,urgency_score,gpu_hours_requested,team_size,project_type,priority
0,1,0.37454,4,94,11,Logistics,Low
1,2,0.950714,3,87,7,Retail,Medium
2,3,0.731994,7,466,5,Logistics,Medium
3,4,0.598658,8,887,4,Logistics,Medium
4,5,0.156019,4,10,5,Research,Low


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Load dataset
df = pd.read_csv("greenland_dataset.csv")

# Encode categorical features
le = LabelEncoder()
df['project_type_encoded'] = le.fit_transform(df['project_type'])
df['priority_encoded'] = le.fit_transform(df['priority'])

# Features and labels
X = df[['roi_score', 'urgency_score', 'gpu_hours_requested', 'team_size', 'project_type_encoded']]
y = df['priority_encoded']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [3]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       1.00      0.75      0.86         4
           1       1.00      1.00      1.00        24
           2       0.97      1.00      0.98        32

    accuracy                           0.98        60
   macro avg       0.99      0.92      0.95        60
weighted avg       0.98      0.98      0.98        60



In [4]:
import joblib

# Save model
joblib.dump(model, "greenland_priority_model.pkl")

# Save label encoder (optional)
joblib.dump(le, "label_encoder.pkl")


['label_encoder.pkl']

In [6]:
le = LabelEncoder()
le.fit(df['project_type'])


In [7]:
le.transform(['AI'])


array([0])

In [8]:
# BEFORE fitting the encoder, manually ensure all categories are seen
all_possible_types = ['AI', 'Retail', 'Logistics', 'Research']
df['project_type'] = pd.Categorical(df['project_type'], categories=all_possible_types)
le.fit(df['project_type'])


In [9]:
joblib.dump(le, "label_encoder.pkl")


['label_encoder.pkl']

In [10]:
label_map = dict(zip(le.classes_, le.transform(le.classes_)))
print(label_map)  # See the mapping


{'AI': np.int64(0), 'Logistics': np.int64(1), 'Research': np.int64(2), 'Retail': np.int64(3)}


In [14]:
# Ensure all categories are represented
all_types = ['AI', 'Retail', 'Logistics', 'Research']
df['project_type'] = pd.Categorical(df['project_type'], categories=all_types)

le = LabelEncoder()
le.fit(df['project_type'])
df['project_type_encoded'] = le.transform(df['project_type'])

# Save this encoder
joblib.dump(le, 'label_encoder.pkl')


['label_encoder.pkl']

In [15]:
# Load model
model = joblib.load("greenland_priority_model.pkl")

# Sample test input
test_input = pd.DataFrame([{
    'roi_score': 0.85,
    'urgency_score': 9,
    'gpu_hours_requested': 200,
    'team_size': 10,
    'project_type_encoded': le.transform(['AI'])[0]
}])

pred = model.predict(test_input)
priority_label = le.inverse_transform(pred)
print("Predicted Priority:", priority_label[0])


Predicted Priority: AI


In [16]:
def allocate_gpu(df, total_gpu_limit):
    df = df[df['priority'] == 'High']
    df = df.sort_values(by='roi_score', ascending=False)
    
    allocated = []
    used_gpu = 0

    for _, row in df.iterrows():
        if used_gpu + row['gpu_hours_requested'] <= total_gpu_limit:
            allocated.append(row['project_id'])
            used_gpu += row['gpu_hours_requested']
        else:
            break

    return allocated, used_gpu
