In [None]:
import matplotlib.pyplot as plt
import numpy as np
from get_cursor import get_cursor
import statements

In [None]:
curs, db = get_cursor('../me.db')
# get interface to sqlite database, column names
info = [curs.execute('pragma table_info(Daily_Functionality);').fetchall()][0]
labels = [x[1] for x in info]

# columns is a dictionary 
# in which values are lists of 420+ 2-string tuples:
# columns[key] = [ 420*(<date_string>, <number_string>) ]
# for each key
columns = {}
for a in labels:
    columns[a] = curs.execute('select Date, ' + a + ' from Daily_Functionality;').fetchall()

In [None]:
# Information regarding the variability of each column in this cell:
dates = [d[0] for d in columns['Date']]
for k in columns.keys():
    temp_cols = []
    for m in range(len(columns[k])):
        try:
            temp_cols.append(float(columns[k][m][1]))
        except (TypeError, ValueError):
            pass
    #
    try:
        assert type(temp_cols[1]) == float
    except (AssertionError, IndexError):
        continue
    r = np.array(temp_cols)
    print(k + ': ')
    #fig = plt.figure()
    #ax = fig.add_axes([0, 0, len(dates)/40, 4])
    #ax.plot(list(range(len(dates))), r)
    #ax.set_title(k)
    #plt.show()
    print('\tAverage:', str(np.average(r)))
    print('\tVariance:', str(r.var()))
    print('\tStandard deviation:', str(r.std()))
    #
    columns[k] = r

In [None]:
# Build list of factors to find relationships for
# to find how factor affects cofactor up to 31 days in the future
# and how it is affected by cofactor up to 31 days in the past
factors = labels[9:20]
factors.append(labels[5])
relationships = {}
for factor in factors:
    # a dictionary of dictionaries
    relationships[factor] = {}
    for cofactor in columns.keys():
        if cofactor not in [factor, 'Over_Extending', 'Notes', 'Date', 'Weekday']:
            # with each sub-dictionary referring to a list of correlation coefficients
            relationships[factor][cofactor] = []
            # the time range:
            min_ = -31
            max_ = 31
            for time_delta in range(min_, max_+1):
                index = 0
                temp = []
                try:
                    # for every day of distance we shorten the length data we use
                    # from the cofactor's column:
                    for c in range(len(columns[cofactor])-abs(time_delta)):
                        if time_delta < 0:
                            # day is negative, last $day number of cofactor values are excluded
                            # because $factor is dependent variable
                            temp.append(columns[cofactor][index])
                        else:
                            # day is positive, first $day number of cofactor values are excluded
                            # because $cofactor is dependent variable
                            temp.append(columns[cofactor][index+time_delta])
                        index += 1
                except IndexError as e:
                    print(e)
                # arrays are faster than lists
                vals = np.array(temp)
                if time_delta <= 0:
                    # we take the first $day number of values off of factor's column
                    correlation = np.corrcoef(columns[factor][abs(time_delta):], vals)[0][1]
                else:
                    # we take the last $day number of values off of factor's column
                    correlation = np.corrcoef(columns[factor][:-time_delta], vals)[0][1]
                relationships[factor][cofactor].append(correlation)


In [None]:
for factor in relationships.keys():
    for cofactor in relationships[factor].keys():
        already_titled = False
        index = 0
        for value in relationships[factor][cofactor]:
            if value > .2 and value < .3:
                if not already_titled:
                    print('factor:', factor)
                    print('cofactor:', cofactor)
                    already_titled = True
                print('index, value:', str(index), value)
            index += 1


In [None]:
"""
for factor in relationships.keys():
    num = 0
    for cofactor in relationships[factor]:
        fig = plt.figure()
        ax = fig.add_axes([0, 0, max_/5, 1])
        ax.set_title('Correlation Over Time')
        ax.set_xlabel('Days Between Cofactor ' + cofactor + ' and ' + factor)
        ax.set_ylabel('Correlation Coefficient * 100:')
        try:
            thyme = list(range(min_, max_+1))
            assert thyme == len(relationships[factor][cofactor])
        except AssertionError as e:
            print(e)
            print('thyme not equal to length of relationships[factor][cofactor]')
            print('len(thyme):', len(thyme))
            print('len(relationships[factor][cofactor]):', len(relationships[factor][cofactor]))
        values = [100*value for value in relationships[factor][cofactor]]
        ax.plot(thyme, relationships[factor][cofactor])
        plt.savefig()
"""
factor = 'Total_Vices'
cofactor = 'Total_Work'
fig = plt.figure()
ax = fig.add_axes([0, 0, max_/5, 1])
ax.set_title('Factor: Total_Vices; Cofactor: Total_Work')
ax.set_xlabel('Days between Factor and Cofactor')
ax.set_ylabel('Correlation Coefficient * 100')
thyme = list(range(min_, max_+1))
values = [100*value for value in relationships[factor][cofactor]]
ax.plot(thyme, values)


In [None]:
# Section on fourier transformations to find out what my "cycle" really is.

In [None]:
# commit any transactions, close db connections:
#db.commit()
curs.close(); db.close()