In [None]:
import pandas as pd

# min and max revenue
min_revenue = 100 
max_revenue = 10000

input_file = 'C:/Users/faiyas.sheik/Documents/OSMR/Output/Final/compiled_file_01_28.xlsx'
output_file = 'C:/Users/faiyas.sheik/Documents/OSMR/compiled_file_01_28_output.xlsx'
df = pd.read_excel(input_file)

required_columns = [
    "ACCOUNT #", "Miles from Origin", "Haversine Distance", 
    "Within Same LOB & Site - 5 Miles", "Within Same LOB & Site - 10 Miles",
    "Same FREQ Within Same LOB & Site - 5 Miles", "Same FREQ Within Same LOB & Site - 10 Miles", 
    "LOB", "Site", "Revenue", "GALLONS", "FREQ"
]
if not all(col in df.columns for col in required_columns):
    raise ValueError(f"Excel file must contain the following columns: {required_columns}")

df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
df.reset_index(drop=True, inplace=True)

df['Distance'] = df.apply(
    lambda row: row['Haversine Distance'] if pd.isna(row['Miles from Origin']) else max(row['Miles from Origin'], row['Haversine Distance']), axis=1)

df['$perMile'] = df["Revenue"] / df['Distance']
df['$perGal'] = df["GALLONS"] / df['Distance']

def calculate_score(row):
    weights = {
        'Distance': -1,
        'Within Same LOB & Site - 5 Miles': 1.2,
        'Within Same LOB & Site - 10 Miles': 0.8,
        'Same FREQ Within Same LOB & Site - 5 Miles': 1.2,
        'Same FREQ Within Same LOB & Site - 10 Miles': 0.8,
        '$perMile': .5,
        '$perGal': .5
        # ,'FREQ': -1
    }
    freq_value = row['FREQ'] if row['FREQ'] != 0 else 52
    row['FREQ'] = freq_value

    return sum(weights[col] * row.get(col, float('nan')) for col in weights)

df['Raw Score'] = df.apply(calculate_score, axis=1)

def scale_scores(group, min_revenue, max_revenue):
    Q1 = group['Raw Score'].quantile(0.25)
    Q3 = group['Raw Score'].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    group['Raw Score'] = group['Raw Score'].clip(lower=lower_bound, upper=upper_bound)
    min_score, max_score = group['Raw Score'].min(), group['Raw Score'].max()
    
    if max_score == min_score:
        group['Scaled Score'] = 100 if max_score > lower_bound else 0
    else:
        group['Scaled Score'] = (((group['Raw Score'] - min_score) / (max_score - min_score)) * 100).round()
    
    group.loc[group['Raw Score'] > upper_bound, 'Scaled Score'] = 100
    group.loc[group['Raw Score'] < lower_bound, 'Scaled Score'] = 0
    group['Scaled Score'] = group['Scaled Score'].astype('object')
    group.loc[(group['Revenue'] < min_revenue) | (group['Revenue'] > max_revenue), 'Scaled Score'] = 'No Score'
    
    return group

# Ensure 'Site' remains in the output by resetting index after groupby
df = df.groupby('Site', group_keys=False).apply(scale_scores, min_revenue=min_revenue, max_revenue=max_revenue)
df.reset_index(drop=True, inplace=True)  # Reset index to keep 'Site'

# Add Category column based on Scaled Score
def assign_category(score):
    if score == "No Score":
        return "No Score"
    elif score > 90:
        return "Category A"
    elif score > 75:
        return "Category B"
    elif score > 50:
        return "Category C"
    elif score > 30:
        return "Category D"
    else:
        return "Category E"

df['Category'] = df['Scaled Score'].apply(assign_category)

df.to_excel(output_file, index=False)
print(f"Output has been successfully saved to: {output_file}")


  df = df.groupby('Site', group_keys=False).apply(scale_scores, min_revenue=min_revenue, max_revenue=max_revenue)


Output has been successfully saved to: C:/Users/faiyas.sheik/Documents/OSMR/compiled_file_01_28_output.xlsx
