In [40]:
import numpy as np
import pandas as pd
from IPython.core.display import HTML

def pretty(dataframe):
    display(dataframe.style.format({'funding': '{:,} €'}))

raw_data = pd.read_csv('1.C.1.csv', delimiter=';')
data = raw_data[raw_data.year == 2018].drop('year', 1)

unis = dict()
for uni in data.university.unique():
    unis[uni] = list()
    uni_data = data[data.university == uni].funding.sum()
    #if not uni.startswith("Univ") or uni_data < 5000000:
    #    data = data[data.university != uni]
 
table = pd.pivot_table(data, values='funding', index=['university'],
                       columns=['funder'], fill_value=0, #margins=True,
                       aggfunc=np.sum)

totals_abs = table.sum()
totals_avg = table.mean()
totals = totals_abs/totals_abs.sum()

sums = table.agg(['sum'], axis="columns")['sum']
sums_avg = table.agg(['mean'], axis="columns")['mean']

interesting = 1 * table.gt(sums_avg, axis="rows") \
            + 2 * table.div(sums, axis="rows").gt(totals) \
            + 4 * table.gt(totals_avg, axis="columns") 


#1: uniintern
#2: vergleichend, relativ
#3: systemisch

interesting = interesting.rename(index=str, columns={'sonstige': 'sonstige Fördergeber', 'nicht bekannt / nicht zuordenbar': 'nicht zuordenbare Fördergeber'})
headers = interesting.columns
for row in interesting.itertuples():
    (uni, *values) = row
    for idx, element in enumerate(values):
        funder = headers[idx]
        switcher = [
            '',
            '$funder ist zwar uniintern wichtig, aber andere Universitäten setzen stärker auf $funder. Für $funder ist die Universität nicht relevant.',
            'Die $uni ist von $funder stärker abhängig als andere Universitäten, auch wenn $funder uniintern keine hohe Bedeutung hat. Auch für $funder ist die Universität nicht relevant.',
            '$funder ist sowohl innerhalb der Universität als auch im relativen Vergleich mit anderen eine wichtige Geldquelle. Für $funder allerdings ist die $uni nicht relevant.',
            'Für $funder ist die Universität bedeutsam, aber innerhalb und im relativen Vergleich mit anderen Universitäten ist $funder nicht relevant.',
            'Obwohl die Universität bei $funder eine erfolgreiche Fördernehmerin ist, konzentrieren andere Universitäten stärker auf $funder. Dennoch ist $funder einer der großen Förderer .',
            'Die Universität ist erfolgreich bei $funder und auch für $funder ist die $uni relevant. Nur uniintern hat $funder keine große Bedeutung.',
            '$funder ist ein wichtiger Fördergeber, sowohl intern als auch im relativen Vergleich mit anderen Universitäten. Und auch für $funder ist die $uni eine wichtige Partnerin.'           
        ]
        
        text = switcher[element].replace('$uni', uni).replace('$funder', '<strong>' + funder + '</strong>')
        if text != '':
            text = text[0:1].capitalize() + text[1:]
            unis[uni].append((element, text, funder))
            
for uni, statements in unis.items():
    if len(statements) > 0:
        statements.sort(key=lambda tup: tup[0], reverse=True)
        old_importance = 0
        display(HTML('<h3>' + uni + '</h3><p style="font-family:verdana">'))
        text = ''
        for statement in statements:
            importance = statement[0]
            if importance == old_importance:
                text += 'Dies lässt sich ebenso über <strong>' + statement[2] + '</strong> sagen.'
            else:
                text += '<br/>' + statement[1]
            text += ' '
            old_importance = importance
            
        display(HTML(text + '</p>'))
        
        
table['Summe'] = table.agg(['sum'], axis="columns")

display(table.style.format('{:,} €'))
    
print('{:,} €'.format(table.loc[('Universität Wien', 'Summe')]))

funder,Bund (Ministerien),EU,FFG,FWF,Gemeinden und Gemeindeverbände (ohne Wien),Länder (inkl. deren Stiftungen und Einrichtungen),Unternehmen,andere internationale Organisationen,nicht bekannt / nicht zuordenbar,sonstige,Summe
university,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Akademie der bildenden Künste Wien,"64,032 €","2,500 €",0 €,"724,207 €",898 €,"14,385 €","15,273 €",0 €,0 €,"197,870 €","1,019,165 €"
Medizinische Universität Graz,"727,683 €","2,149,800 €","957,441 €","4,400,452 €","40,834 €","4,585,606 €","32,440,938 €","145,251 €",0 €,"1,985,663 €","47,433,668 €"
Medizinische Universität Innsbruck,"3,692,310 €","3,169,485 €","617,599 €","8,230,127 €",0 €,"487,287 €","21,297,441 €",0 €,0 €,"365,223 €","37,859,472 €"
Medizinische Universität Wien,"1,672,802 €","8,347,843 €","570,863 €","18,825,925 €","80,000 €","2,015,327 €","21,467,168 €","173,461 €",0 €,"33,211,761 €","86,365,150 €"
Montanuniversität Leoben,"3,154,354 €","3,460,401 €","8,312,233 €","1,241,007 €",0 €,"5,337,531 €","14,284,451 €",0 €,0 €,0 €,"35,789,977 €"
Technische Universität Graz,"1,270,607 €","11,016,518 €","18,213,261 €","6,572,226 €","300,201 €","1,304,889 €","24,719,792 €",962 €,0 €,"1,502,675 €","64,901,131 €"
Technische Universität Wien,"2,190,866 €","14,732,401 €","13,392,860 €","19,662,614 €","394,796 €","2,886,762 €","22,101,493 €","862,147 €",0 €,"1,262,398 €","77,486,337 €"
Universität Graz,"828,649 €","2,907,788 €","967,722 €","12,896,939 €","685,430 €","2,123,043 €","1,721,040 €","252,022 €",0 €,"865,364 €","23,247,997 €"
Universität Innsbruck,"825,619 €","8,544,360 €","3,637,254 €","16,317,638 €","184,004 €","5,495,536 €","4,795,888 €","168,906 €",0 €,"20,815 €","39,990,020 €"
Universität Klagenfurt,"960,387 €","1,024,247 €","694,327 €","1,378,473 €","15,000 €","584,712 €","1,606,003 €","9,804 €",0 €,"120,901 €","6,393,854 €"


74,550,657 €


In [104]:
class University:
    pass


class Dimension:
    @classmethod
    def create(cls, dataframe, dimensions):
        dims = list()
        for dim in dimensions:
            assert dim in dataframe, "There is no column '" + dim + "'"
            cats = dataframe[dim].unique()
            dims.append(Dimension(dim, cats))
        return dims
        
    def __init__(self, name, categories):
        self.__name = name
        self.__categories = categories


class Structure:
    def __init__(self, datasheet, structure_column):
        self.__data = pd.pivot_table(datasheet.current_dataframe,
                                     values=datasheet.value_column_name,
                                     index=[datasheet.university_column_name],
                                     columns=[structure_column],
                                     fill_value=0, aggfunc=np.sum)
        self.__sum = self.__data.agg(['sum'], axis='columns')['sum']
        self.__avg = self.__data.agg(['mean'], axis='columns')['mean']
        
        self.__structure = self.__data.div(self.__sum, axis="rows")
        display(self.__structure.style.format('{0:.1%}'))
        
    @property
    def structure_name(self):
        return self.structure_column
    
    

class Profile:
    pass

class Relevance:
    pass



dimensions = ['funder', 'fos']
timepoint_name = 'year'
university_name = 'university'


class Datasheet:
    @property
    def value_column_name(self):
        return self.__value_column
    
    @property
    def university_column_name(self):
        return self.__university_column
    
    @property
    def current_dataframe(self):
        return self.__current_dataframe
    
    def __init__(self, dataframe, dimensions, timepoint_name, university_name, value_name):
        assert timepoint_name in dataframe, "There is no column '" + timepoint_name + "'"
        assert university_name in dataframe, "There is no column '" + university_name + "'"
        assert value_name in dataframe, "There is no colum with valid data called '" + value_name + "'"
        
        self.__dataframe = dataframe
        self.__timepoint_column = timepoint_name
        self.__university_column = university_name
        self.__value_column = value_name
        
        self.__dimensions = Dimension.create(dataframe, dimensions)
        self.__timepoints = dataframe[timepoint_name].unique()
        self.__universities = dataframe[university_name].unique()
        
        self.__current_timepoint = dataframe[timepoint_name].max()
        self.__current_dataframe = dataframe[dataframe[timepoint_name] == self.__current_timepoint].drop(timepoint_name, 1)
        
        table = pd.pivot_table(self.__current_dataframe, values='funding', index=['university'],
                       columns=['funder'], fill_value=0, #margins=True,
                       aggfunc=np.sum)
        
        s = Structure(self, 'funder')
        
        
        
funding_sheet = Datasheet(raw_data, ['funder', 'fos'], 'year', 'university', 'funding')


















funder,Bund (Ministerien),EU,FFG,FWF,Gemeinden und Gemeindeverbände (ohne Wien),Länder (inkl. deren Stiftungen und Einrichtungen),Unternehmen,andere internationale Organisationen,nicht bekannt / nicht zuordenbar,sonstige
university,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Akademie der bildenden Künste Wien,6.3%,0.2%,0.0%,71.1%,0.1%,1.4%,1.5%,0.0%,0.0%,19.4%
Medizinische Universität Graz,1.5%,4.5%,2.0%,9.3%,0.1%,9.7%,68.4%,0.3%,0.0%,4.2%
Medizinische Universität Innsbruck,9.8%,8.4%,1.6%,21.7%,0.0%,1.3%,56.3%,0.0%,0.0%,1.0%
Medizinische Universität Wien,1.9%,9.7%,0.7%,21.8%,0.1%,2.3%,24.9%,0.2%,0.0%,38.5%
Montanuniversität Leoben,8.8%,9.7%,23.2%,3.5%,0.0%,14.9%,39.9%,0.0%,0.0%,0.0%
Technische Universität Graz,2.0%,17.0%,28.1%,10.1%,0.5%,2.0%,38.1%,0.0%,0.0%,2.3%
Technische Universität Wien,2.8%,19.0%,17.3%,25.4%,0.5%,3.7%,28.5%,1.1%,0.0%,1.6%
Universität Graz,3.6%,12.5%,4.2%,55.5%,2.9%,9.1%,7.4%,1.1%,0.0%,3.7%
Universität Innsbruck,2.1%,21.4%,9.1%,40.8%,0.5%,13.7%,12.0%,0.4%,0.0%,0.1%
Universität Klagenfurt,15.0%,16.0%,10.9%,21.6%,0.2%,9.1%,25.1%,0.2%,0.0%,1.9%
