In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
from constants_and_util import *
import matplotlib.pyplot as plt
import pandas as pd
import random
import numpy as np
from copy import deepcopy
from scipy.signal import argrelextrema
import statsmodels.api as sm
from scipy.special import expit
from scipy.stats import scoreatpercentile
import pickle
import os
from collections import Counter
import dataprocessor
import compare_to_seasonal_cycles
assert not USE_SIMULATED_DATA
import sys
from matplotlib.colors import LinearSegmentedColormap
import cPickle
assert sys.version[0] == '2'
import generate_results_for_paper

In [None]:
results = compare_to_seasonal_cycles.load_all_results()

In [None]:
generate_results_for_paper.make_period_regression_specifications_robustness_check(results)

# Make four-cycle plots for all symptoms.

In [None]:
for opposite_pair in results.keys():
    compare_to_seasonal_cycles.make_four_cycle_plots(results,
                                         ['by_very_active_northern_hemisphere_loggers'], 
                                         [opposite_pair], 
                                        suptitle=False,
                                        different_colors_for_each_cycle=True, 
                                        figname='figures_for_paper/four_cycle_plot_%s.png' % opposite_pair)

# Alternate four-cycle plot: clock plot. 

These take a while to make. 

In [None]:
# if you want the fifteen cycle plot. 
generate_results_for_paper.make_clock_plot(results, just_plot_single_cycle=False)

In [None]:
# if you want to make a movie. 
generate_results_for_paper.make_clock_plot(results, just_plot_single_cycle=True)

# Symptom tables

In [None]:
generate_results_for_paper.make_table_of_all_symptoms_not_just_those_in_analysis()

# Table of basic user statistics. 

In [None]:

generate_results_for_paper.make_table_of_user_statistics(n_chunks_to_use=64)

# Table of symptoms we actually use. Columns are user counts, symptom count, mean value of symptom. 


In [None]:
generate_results_for_paper.make_user_count_table(results)

# Period start for each symptom? 

In [None]:
generate_results_for_paper.make_near_period_start_table(results)

# Robustness checks for figure 1. 

### Show that amplitudes of cycles don’t change too much for different substratifications

Still need to work on this but preliminary attempt seems promising. 
I think we can probably argue that the basic point of Figure 1 is robust to how many symptoms
are logged, etc.


In [None]:
categories_to_substratify_by = ['by_n_symptom_categories_used', 
                              'by_total_symptoms_logged', 
                  'by_categorical_age',
                  'by_country', 
                  'by_categorical_latitude', 
                  'by_any_filtering']

# identify top 5 countries for robustness check. We only use large countries 
# because otherwise the variance in cycles explodes -> amplitude explodes. 

total_obs_for_countries = {}
for k in results:
    for country in results[k]['by_country']:
        if country not in total_obs_for_countries:
            total_obs_for_countries[country] = 0
        total_obs_for_countries[country] += results[k]['by_country'][country]['overall_n_obs']
top_n_countries = sorted(total_obs_for_countries.keys(), key=lambda x:total_obs_for_countries[x])[::-1][:5]
for country in top_n_countries:
    print country, total_obs_for_countries[country]
        
generate_results_for_paper.make_substratification_robustness_plot(results, 
                                                            categories_to_substratify_by, 
                                                            top_n_countries)


In [None]:
for opposite_pair in results.keys():
    print("\n\n\n***********%s" % opposite_pair)
    for substratification in categories_to_substratify_by:
        levels = results[opposite_pair][substratification].keys()
        if substratification == 'by_country':
            levels_to_skip = [a for a in levels if a not in top_n_countries]
        else:
            levels_to_skip = None
        
    
        compare_to_seasonal_cycles.make_four_cycle_plots(results,
                                         [substratification], 
                                         [opposite_pair], 
                                        substratification_levels_to_skip=levels_to_skip, 
                                                         suptitle=False)

### Show that amplitudes of cycles don’t change too much for different regression specifications. 

### Different specifications for seasonal cycle

In [None]:
generate_results_for_paper.make_seasonal_regression_specifications_robustness_check(results)
                                                                                                    

### Different regression specifications

In [None]:
generate_results_for_paper.make_regression_specifications_robustness_plot(results)


### Show that mood symptoms still show qualitatively similar patterns when we don't use the opposite trick. 

In [None]:
generate_results_for_paper.make_mood_symptoms_robustness_plot_without_opposite_symptoms()

# Robustness checks for figure 2: country-specific individual-effects. 

In [None]:
# country specific effects
generate_results_for_paper.make_country_robustness_plot(results)
                

# Data sanity checks. 

Period lengths look sane. Not just filtering for very active loggers here. 
This also shows that there isn't huge hetereogeneity in period length distribution which could be messing things up. 

In [None]:
generate_results_for_paper.make_period_lengths_plot(n_chunks_to_use=64)

### Recapitulates known country-specific happiness variation. 

In [None]:
from constants_and_util import *
generate_results_for_paper.recapitulate_country_specific_happiness_trends(results, 
                                                                          min_obs=MIN_OBS_FOR_SUBGROUP, 
                                                                          min_users=MIN_USERS_FOR_SUBGROUP)

### Show we can recapitulate worldwide trends for women + obesity

In [None]:
generate_results_for_paper.recapitulate_country_specific_weight_trends(results,
                                                                       min_obs=MIN_OBS_FOR_SUBGROUP, 
                                                                     min_users=MIN_USERS_FOR_SUBGROUP)

                 


### Recapitulates known weekly, seasonal, and menstrual cycles in symptoms we did not examine in the main analysis. 

In [None]:
generate_results_for_paper.make_previously_known_cycles_plot()