In [1]:
import sys
import os
sys.path.append(os.path.abspath('../Backend'))
from wahlrecht_polling_firms import get_tables
from days_to_weeks import week
from pandas import DataFrame
import numpy as np
import pandas as pd
import datetime as dt

In [42]:
def average(data, model, weightvector=None):
    '''
    averages over the polling data of all firms according to the data available for each week.
    
    data: polling data and the model that should be used('simple','weightparticipants'or
    'weightfirms'(needs a weightdictionary with a weight for every firm))
    return: dictionary of parties with the average results for every week
    '''
    week_ind={}
    n_weeks = 0
    for key in data:
        wk = week(data[key])
        week_ind[key]= wk
        n_weeks = np.maximum(n_weeks,np.max(wk))
    
    n_parties=7
    result=np.zeros((n_weeks,n_parties))
    total_part = np.zeros(n_weeks)
    parties=['CDU/CSU','SPD','GRÜNE','FDP','LINKE','AfD','Sonstige']
    
    
    if model == 'simple':
        for i in np.arange (n_weeks):
            n = 0
            for key in data:               
                if i in week_ind[key]:                    
                    current_ind = np.where(week_ind[key]==i)[0][0]
                    total_part[i] += data[key]['Befragte'][current_ind]
                    j = 0
                    for p in parties:
                        result[i,j] += data[key][p][current_ind]
                        j += 1
                    n += 1
            result[i,:] /= n
    
    if model == 'weightparticipants':
        for i in np.arange(n_weeks):
            n = 0
            for key in data:
                if i in week_ind[key]:
                    current_ind = np.where(week_ind[key]==i)[0][0]
                    total_part[i] += data[key]['Befragte'][current_ind]
                    n_part = data[key]['Befragte'][current_ind]
                    j = 0
                    for p in parties:
                        result[i,j] += data[key][p][current_ind]*n_part
                        j += 1
                    n += n_part  
            result[i,:] /= n      
            
    if model == 'weightfirms':
        for i in np.arange(n_weeks):
            n = 0
            for key in data:
                if i in week_ind[key]:
                    current_ind = np.where(week_ind[key]==i)[0][0]
                    total_part[i] += data[key]['Befragte'][current_ind]
                    j = 0
                    for p in parties:
                        result[i,j] += data[key][p][current_ind]*weightvector[key]
                        j += 1
                    n += weightvector[key]  
            result[i,:] /= n           
    
    res_dict = {}
    j = 0
    for p in parties:
        res_dict[p] = result[:,j]
        j += 1
    res = pd.DataFrame.from_dict(res_dict)
    today_date = dt.date.today()
    next_sunday = today_date + dt.timedelta(6 - today_date.weekday())
    sundays = np.array(np.zeros(n_weeks),dtype='datetime64[ms]')
    for i in np.arange(n_weeks):
        sundays[i] = np.array(next_sunday-dt.timedelta(np.float64(7*i)),dtype='datetime64[ms]')   
    res['Befragte'] = total_part
    res['Datum'] = sundays
    df = res.drop(0,axis=0)
    df.index = df.index-1
    return df

    

In [44]:
#testing
data=get_tables()

In [39]:
w = {'allensbach':0.2, 'emnid':0.1, 'forsa':0.1, 'politbarometer':0.1, 'gms':0.2, 'dimap':0.1, 'insa':0.1}

In [43]:
res = average(data,'weightfirms',w)
print(res)

      AfD  CDU/CSU  FDP  GRÜNE  LINKE  SPD  Sonstige  Befragte      Datum
0     NaN      NaN  NaN    NaN    NaN  NaN       NaN       0.0 2017-07-09
1     NaN      NaN  NaN    NaN    NaN  NaN       NaN       0.0 2017-07-02
2     NaN      NaN  NaN    NaN    NaN  NaN       NaN       0.0 2017-06-25
3     NaN      NaN  NaN    NaN    NaN  NaN       NaN       0.0 2017-06-18
4     NaN      NaN  NaN    NaN    NaN  NaN       NaN       0.0 2017-06-11
5     NaN      NaN  NaN    NaN    NaN  NaN       NaN       0.0 2017-06-04
6     NaN      NaN  NaN    NaN    NaN  NaN       NaN       0.0 2017-05-28
7     NaN      NaN  NaN    NaN    NaN  NaN       NaN       0.0 2017-05-21
8     NaN      NaN  NaN    NaN    NaN  NaN       NaN       0.0 2017-05-14
9     NaN      NaN  NaN    NaN    NaN  NaN       NaN       0.0 2017-05-07
10    NaN      NaN  NaN    NaN    NaN  NaN       NaN       0.0 2017-04-30
11    NaN      NaN  NaN    NaN    NaN  NaN       NaN       0.0 2017-04-23
12    NaN      NaN  NaN    NaN    NaN 