In [None]:
import os

import numpy as np

import pandas as pd
from pandas import Series, DataFrame

import matplotlib.pyplot as plt

In [None]:
weights = DataFrame()
file = 'scores.xlsx'

raw_product = pd.read_excel(file, sheet_name='Product', skiprows=2, usecols=3)
raw_product['page'] = 'product'
raw_company = pd.read_excel(file, sheet_name='Company', skiprows=2, usecols=3)
raw_company['page'] = 'company'

raw = raw_product.append(raw_company)
raw = raw.reset_index(drop=True)
raw.columns = ['requirement', 'score', 'notes', 'weight', 'page']
del raw['score']
del raw['notes']


# starting at the last category
# extract the rows for the category to the end of the df
# remove the rows
header_indexes = raw.index[raw['requirement'] == 'Requirement'].tolist()
header_indexes = list(reversed(header_indexes))

for hi in header_indexes:
    category = raw.iloc[hi-1][0]
    sub_weights = DataFrame(raw.iloc[hi+1:])
    sub_weights['category'] = category
    sub_weights = sub_weights[pd.notnull(sub_weights['requirement'])]

    weights = weights.append(sub_weights)
    raw = raw.iloc[0:hi-1]

    
weights = weights.reset_index(drop=True)
weights

In [None]:
scores = DataFrame()
files = [f for f in os.listdir('.') if f.endswith('-scores.xlsx')]

for file in files:
    raw = pd.read_excel(file, sheet_name='Product', usecols=1, header=None)
    scorer, product = raw.iloc[0][1], raw.iloc[1][1]

    raw_product = pd.read_excel(file, sheet_name='Product', skiprows=2, usecols=2)
    raw_product['page'] = 'product'
    raw_company = pd.read_excel(file, sheet_name='Company', skiprows=2, usecols=2)
    raw_company['page'] = 'company'


    raw = raw_product.append(raw_company)
    raw = raw.reset_index(drop=True)
    raw.columns = ['requirement', 'score', 'notes', 'page']

    # starting at the last category
    # extract the rows for the category to the end of the df
    # remove the rows
    header_indexes = raw.index[raw['requirement'] == 'Requirement'].tolist()
    header_indexes = list(reversed(header_indexes))

    for hi in header_indexes:
        category = raw.iloc[hi-1][0]
        sub_scores = DataFrame(raw.iloc[hi+1:])
        sub_scores['category'] = category
        sub_scores['scorer'] = scorer
        sub_scores['product'] = product
        sub_scores = sub_scores[pd.notnull(sub_scores['requirement'])]

        scores = scores.append(sub_scores)
        raw = raw.iloc[0:hi-1]

    
scores = scores.reset_index(drop=True)
scores.head()


In [None]:
cleansed = scores.dropna(axis=0, subset=['score'])
cleansed.loc[:,'score'] = pd.to_numeric(cleansed.loc[:,'score'])
cleansed

In [None]:
minimums = cleansed.sort_values('score').groupby(['product','page', 'category', 'requirement'], as_index=False).first()
maximums = cleansed.sort_values('score').groupby(['product','page', 'category', 'requirement'], as_index=False).last()
means = cleansed.groupby(['product','page', 'category', 'requirement'], as_index=False)['score'].mean()
counts = cleansed.groupby(['product','page', 'category', 'requirement'], as_index=False).size().reset_index(name='responses')

minimums = minimums.rename({'score': 'minimum score', 'notes': 'minimum notes', 'scorer': 'minimum scorer'}, axis='columns')
maximums = maximums.rename({'score': 'maximum score','notes': 'maximum notes', 'scorer': 'maximum scorer'}, axis='columns')
means = means.rename({'score': 'mean score'}, axis='columns')

aggregate = pd.merge(minimums, maximums, on=['product','page', 'category', 'requirement'])
aggregate = pd.merge(aggregate, means, on=['product','page', 'category', 'requirement'])
aggregate = pd.merge(aggregate, counts, on=['product','page', 'category', 'requirement'])
aggregate = pd.merge(aggregate, weights, on=['page', 'category', 'requirement'])
aggregate['mean contribution'] = aggregate['mean score'] * aggregate['weight']

aggregate

In [None]:
summary_product = aggregate['mean contribution'].groupby(aggregate['product']).sum().to_frame()
summary_product.columns = ['total weighted score']
summary_product['count'] = aggregate['mean contribution'].groupby(aggregate['product']).count()
summary_product

In [None]:
summary_scorer = cleansed['score'].groupby(cleansed['scorer']).mean().to_frame()
summary_scorer.columns = ['average score']
summary_scorer['count'] = cleansed['score'].groupby(cleansed['scorer']).count()

summary_scorer

In [None]:
cleansed['min score'] = cleansed.groupby(['product','page','category', 'requirement'])['score'].transform('min')
cleansed['max score'] = cleansed.groupby(['product','page','category', 'requirement'])['score'].transform('max')
cleansed['mean score'] = cleansed.groupby(['product','page','category', 'requirement'])['score'].transform('mean')
cleansed['responses'] = cleansed.groupby(['product','page','category', 'requirement'])['score'].transform('count')
cleansed = cleansed.sort_values(by=['product','page', 'category','requirement', 'score'], ascending=[True,True,True,True,False])
cleansed = cleansed[['product','page', 'category','requirement','scorer','score','notes','min score','mean score','max score','responses']]
cleansed