In [1]:
import pandas as pd
import numpy as np
import os.path

In [2]:
DATAFOLDER = './datasets/results'

In [3]:
summer_pairs = pd.read_csv(os.path.join(DATAFOLDER, 'summer_pairs.csv'))
winter_pairs = pd.read_csv(os.path.join(DATAFOLDER, 'winter_pairs.csv'))
autumn_pairs = pd.read_csv(os.path.join(DATAFOLDER, 'autumn_pairs.csv'))
spring_pairs = pd.read_csv(os.path.join(DATAFOLDER, 'spring_pairs.csv'))

In [4]:
print('Number of summer pairs: {}'.format(len(summer_pairs)))
print('Number of winter pairs: {}'.format(len(winter_pairs)))
print('Number of autumn pairs: {}'.format(len(autumn_pairs)))
print('Number of spring pairs: {}'.format(len(spring_pairs)))

Number of summer pairs: 14375
Number of winter pairs: 14757
Number of autumn pairs: 13591
Number of spring pairs: 19234


In [5]:
summer_pairs.head()

Unnamed: 0,Beer Id1,Brewery Id1,Style,Abv,Date1,User Id,Appearance1,Aroma1,Palate1,Taste1,...,Palate Positive2,Palate Neutral2,Palate Negative2,Taste Positive2,Taste Neutral2,Taste Negative2,Month1,Month2,Text Length1,Text Length2
0,4720,323,English Pale Ale,4.2,2011-07-31 12:00:00,flyingpig.524986,4.0,3.5,4.0,4.0,...,0.600418,0.066736,0.332846,0.583243,0.046122,0.370635,7,8,813,720
1,1166,71,English Pale Ale,5.0,2012-09-14 12:00:00,bark.4217,4.0,3.5,3.0,4.0,...,0.642493,0.272138,0.085369,0.942808,0.028565,0.028627,9,9,726,774
2,99005,23394,Euro Pale Lager,5.0,2013-09-06 12:00:00,blackhaddock.90923,3.25,3.25,3.25,3.5,...,0.045273,0.086728,0.868,0.305613,0.220263,0.474125,9,7,759,669
3,99005,23394,Euro Pale Lager,5.0,2013-09-06 12:00:00,blackhaddock.90923,3.25,3.25,3.25,3.5,...,0.470252,0.159512,0.370236,0.025083,0.032215,0.942701,9,8,759,760
4,99123,8412,English Bitter,5.0,2013-09-08 12:00:00,blackhaddock.90923,3.5,3.25,3.25,3.5,...,0.038635,0.045161,0.916204,0.01004,0.005884,0.984076,9,8,415,364


In [6]:
features = ['Appearance Positive1', 'Appearance Negative1', 'Appearance Neutral1',\
            'Appearance Positive2', 'Appearance Negative2', 'Appearance Neutral2',\
            'Aroma Positive1', 'Aroma Negative1', 'Aroma Neutral1',\
            'Aroma Positive2', 'Aroma Negative2', 'Aroma Neutral2',\
            'Palate Positive1', 'Palate Negative1', 'Palate Neutral1',\
            'Palate Positive2', 'Palate Negative2', 'Palate Neutral2',\
            'Taste Positive1', 'Taste Negative1', 'Taste Neutral1',\
            'Taste Positive2', 'Taste Negative2', 'Taste Neutral2'
           ]

In [7]:
thresh = 0.9

for feature in features:
    summer_pairs[feature + '_ind'] = 1*(summer_pairs[feature] > thresh)
    winter_pairs[feature + '_ind'] = 1*(winter_pairs[feature] > thresh)
    autumn_pairs[feature + '_ind'] = 1*(autumn_pairs[feature] > thresh)
    spring_pairs[feature + '_ind'] = 1*(spring_pairs[feature] > thresh)

In [8]:
effects = [
    'Appearance Positive', 'Appearance Negative', 'Appearance Neutral',\
    'Aroma Positive', 'Aroma Negative', 'Aroma Neutral',\
    'Palate Positive', 'Palate Negative', 'Palate Neutral',\
    'Taste Positive', 'Taste Negative', 'Taste Neutral'
]

summer_effects = []
winter_effects = []
autumn_effects = []
spring_effects = []
for effect in effects:
    summer_effects.append(summer_pairs[effect + '1_ind'] - summer_pairs[effect + '2_ind'])
    winter_effects.append(winter_pairs[effect + '1_ind'] - winter_pairs[effect + '2_ind'])
    autumn_effects.append(autumn_pairs[effect + '1_ind'] - autumn_pairs[effect + '2_ind'])
    spring_effects.append(spring_pairs[effect + '1_ind'] - spring_pairs[effect + '2_ind'])

In [9]:
summer_effects = pd.DataFrame( dict(zip(effects, summer_effects)) )
winter_effects = pd.DataFrame( dict(zip(effects, winter_effects)) )
autumn_effects = pd.DataFrame( dict(zip(effects, autumn_effects)) )
spring_effects = pd.DataFrame( dict(zip(effects, spring_effects)) )

In [10]:
summer_effects.head()

Unnamed: 0,Appearance Positive,Appearance Negative,Appearance Neutral,Aroma Positive,Aroma Negative,Aroma Neutral,Palate Positive,Palate Negative,Palate Neutral,Taste Positive,Taste Negative,Taste Neutral
0,1,0,0,0,0,0,1,0,0,1,0,0
1,1,0,0,0,0,0,1,0,0,0,0,0
2,0,0,0,-1,0,0,0,0,0,0,0,0
3,0,0,0,0,-1,0,0,0,0,0,-1,0
4,1,0,0,1,-1,0,0,-1,0,1,-1,0


In [11]:
winter_effects.head()

Unnamed: 0,Appearance Positive,Appearance Negative,Appearance Neutral,Aroma Positive,Aroma Negative,Aroma Neutral,Palate Positive,Palate Negative,Palate Neutral,Taste Positive,Taste Negative,Taste Neutral
0,1,-1,0,1,0,0,0,-1,0,0,-1,0
1,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,-1,0,0
4,1,0,0,0,0,0,1,0,0,0,0,0


In [12]:
def sidak(k, alpha=0.05):
    alpha_c = 1 - (1 - alpha) ** (1/k)
    return alpha_c

In [13]:
significance_level = sidak(k=len(effects), alpha=0.05)
print('Adjusted significance level: {:.4f}'.format(significance_level))

gamma = (1 - significance_level) * 100
print('Confidence interval of {:.4f}% needs to be calculated'.format(gamma))

Adjusted significance level: 0.0043
Confidence interval of 99.5735% needs to be calculated


In [14]:
def effect_diff(population1, population2, gamma, nboot=1000):
    n1 = len(population1); n2 = len(population2)
    mean_diff = np.mean( np.random.choice(population1, size=(nboot,n1)), axis=1) - \
                np.mean( np.random.choice(population2, size=(nboot,n2)), axis=1)
    effect_avg = np.mean(mean_diff)
    [effect_low, effect_high] = np.percentile(mean_diff, [(100-gamma)/2, 100-(100-gamma)/2])
    return effect_avg, effect_low, effect_high

In [15]:
effect_avg, effect_low, effect_high = {}, {}, {}

for effect in effects:
    effect_avg[effect], effect_low[effect], effect_high[effect] = effect_diff(winter_effects[effect], summer_effects[effect], gamma)

In [16]:
gap_smaller = [k for k, v in effect_low.items() if v >= 0]
gap_bigger = [k for k, v in effect_high.items() if v <= 0]
print('The gap was bigger during winter than summer for : {}'.format(gap_smaller))
print('The gap was smaller during winter than summer for : {}'.format(gap_bigger))

The gap was bigger during winter than summer for : []
The gap was smaller during winter than summer for : []


Checking for the following changes in sentiment:
- Autumn - Winter
- Winter - Spring
- Spring - Summer
- Summer - Autumn

### Autumn - Winter

In [17]:
effect_avg, effect_low, effect_high = {}, {}, {}

for effect in effects:
    effect_avg[effect], effect_low[effect], effect_high[effect] = effect_diff(autumn_effects[effect], winter_effects[effect], gamma)

In [18]:
gap_smaller = [k for k, v in effect_low.items() if v >= 0]
gap_bigger = [k for k, v in effect_high.items() if v <= 0]
print('The gap was bigger during autumn than winter for : {}'.format(gap_smaller))
print('The gap was smaller during autumn than winter for : {}'.format(gap_bigger))

The gap was bigger during autumn than winter for : []
The gap was smaller during autumn than winter for : ['Appearance Negative', 'Palate Negative', 'Taste Negative']


### Winter - Spring

In [19]:
effect_avg, effect_low, effect_high = {}, {}, {}

for effect in effects:
    effect_avg[effect], effect_low[effect], effect_high[effect] = effect_diff(winter_effects[effect], spring_effects[effect], gamma)

In [20]:
gap_smaller = [k for k, v in effect_low.items() if v >= 0]
gap_bigger = [k for k, v in effect_high.items() if v <= 0]
print('The gap was bigger during winter than spring for : {}'.format(gap_smaller))
print('The gap was smaller during winter than spring for : {}'.format(gap_bigger))

The gap was bigger during winter than spring for : ['Palate Neutral']
The gap was smaller during winter than spring for : ['Appearance Negative', 'Aroma Negative', 'Palate Negative']


### Spring - Summer

In [21]:
effect_avg, effect_low, effect_high = {}, {}, {}

for effect in effects:
    effect_avg[effect], effect_low[effect], effect_high[effect] = effect_diff(spring_effects[effect], summer_effects[effect], gamma)

In [22]:
gap_smaller = [k for k, v in effect_low.items() if v >= 0]
gap_bigger = [k for k, v in effect_high.items() if v <= 0]
print('The gap was bigger during spring than summer for : {}'.format(gap_smaller))
print('The gap was smaller during spring than summer for : {}'.format(gap_bigger))

The gap was bigger during spring than summer for : ['Aroma Negative', 'Palate Negative', 'Taste Negative']
The gap was smaller during spring than summer for : ['Appearance Neutral', 'Palate Neutral']


### Summer - Autumn

In [23]:
effect_avg, effect_low, effect_high = {}, {}, {}

for effect in effects:
    effect_avg[effect], effect_low[effect], effect_high[effect] = effect_diff(summer_effects[effect], autumn_effects[effect], gamma)

In [24]:
gap_smaller = [k for k, v in effect_low.items() if v >= 0]
gap_bigger = [k for k, v in effect_high.items() if v <= 0]
print('The gap was bigger during summer than autumn for : {}'.format(gap_smaller))
print('The gap was smaller during summer than autumn for : {}'.format(gap_bigger))

The gap was bigger during summer than autumn for : ['Aroma Neutral', 'Palate Negative', 'Taste Negative']
The gap was smaller during summer than autumn for : ['Taste Positive']
