In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt

In [2]:
#Load Data
df = pd.read_csv('datasets/FilteredData.csv')
df = df[['ClientID','CycleNumber','LengthofCycle']]
df.loc[:, 'LengthofCycle'] = df['LengthofCycle'] + 1

# Find the partition to different ClientID's
change_index = df.index[df['ClientID'] != df['ClientID'].shift()].to_list()
change_index.append(len(df)-1)
change_indexes = [(change_index[i-1], change_index[i]) for i in range(1,len(change_index))]

In [8]:
df

Unnamed: 0,ClientID,CycleNumber,LengthofCycle
0,nfp8122,1,30
1,nfp8122,2,28
2,nfp8122,3,30
3,nfp8122,4,28
4,nfp8122,5,29
...,...,...,...
1549,nfp8334,7,30
1550,nfp8334,8,29
1551,nfp8334,9,29
1552,nfp8334,10,41


In [10]:
df.groupby('ClientID')[['CycleNumber','LengthofCycle']]

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001E5C49AC790>

In [3]:
# Function for finding the indexes of fixing a menstruation

def hafalga_initial_fixing_indexes(lengths):
    fixing_indexes = []
    for start, finish in change_indexes:
        i = start + 2
        while i < finish:
            if lengths[i] == lengths[i-1] and lengths[i] == lengths[i-2]:
                fixing_indexes.append(i)
                while i < finish and lengths[i+1] == lengths[i]:
                    i += 1
            i += 1
    return fixing_indexes

def dilug_initial_fixing_indexes(lengths):
    fixing_indexes = []
    for start, finish in change_indexes:
        i = start + 2
        while i < finish:
            if lengths[i] - lengths[i-1] == lengths[i-1] - lengths[i-2] and lengths[i] - lengths[i-1] != 0:
                fixing_indexes.append(i)
                while i < finish and lengths[i+1] - lengths[i] == lengths[i] - lengths[i-1]:
                    i += 1
            i += 1
    return fixing_indexes

def week_initial_fixing_indexes(lengths):
    fixing_indexes = []
    weekdays_lenghts = [i for i in range(df['LengthofCycle'].min()+3,df['LengthofCycle'].max()+1,7)]
    for start, finish in change_indexes:
        i = start + 1
        while i < finish:
            if lengths[i] in weekdays_lenghts and lengths[i] == lengths[i-1]:
                fixing_indexes.append(i)
                while i < finish and lengths[i+1] == lengths[i]:
                    i += 1
            i += 1
    return fixing_indexes

def week_dilug_initial_fixing_indexes(lengths):
    fixing_indexes = []
    weekdays_lenghts = [30]
    for start, finish in change_indexes:
        i = start + 1
        while i < finish:
            if lengths[i] in weekdays_lenghts and lengths[i] == lengths[i-1]:
                fixing_indexes.append(i)
                while i < finish and lengths[i+1] == lengths[i]:
                    i += 1
            i += 1
    return fixing_indexes

def dilug_in_dilug_initial_fixing_indexes(lengths):
    fixing_indexes = []
    for start, finish in change_indexes:
        i = start + 3      
        while i < finish:
            b1, b2, b3 = lengths[i-2] - lengths[i-3], lengths[i-1] - lengths[i-2], lengths[i] - lengths[i-1]
            d1, d2 = b2 - b1, b3 - b2
            if d1 == d2 and d1 != 0:
                fixing_indexes.append(i)
                b, d = b3, d2
                while i + 1 < finish and lengths[i+1] == lengths[i] + b + d:
                    i += 1
                    b += d
            i += 1
    return fixing_indexes

In [44]:
df['FixingMenstruation'] = 'NotFixed'
df.loc[hafalga_initial_fixing_indexes(df['LengthofCycle']), 'FixingMenstruation'] = 'Haflaga'
df.loc[dilug_initial_fixing_indexes(df['LengthofCycle']), 'FixingMenstruation'] = 'Dilug'
df.loc[week_initial_fixing_indexes(df['LengthofCycle']), 'FixingMenstruation'] = 'Week'
df.loc[week_dilug_initial_fixing_indexes(df['LengthofCycle']), 'FixingMenstruation'] = 'Week_Dilug'
df.loc[dilug_in_dilug_initial_fixing_indexes(df['LengthofCycle']), 'FixingMenstruation'] = 'Dilug_in_Dilug'

df['FixedMenstruationType'] = df['FixingMenstruation'].shift().fillna('NotFixed')
df['Forecast_fixed'] = np.nan


In [41]:
# Function for filling the column FixedMenstruationType

def hafalga_fill(lengths = df['LengthofCycle'].copy()):
    fixing_indexes = []
    for start, finish in change_indexes:
        i = start + 2
        while i < finish:
            if lengths[i] == lengths[i-1] and lengths[i] == lengths[i-2]:
                cycles_from_fixing = 0
                fixed_cycle = lengths[i]
                while i+1 < finish and cycles_from_fixing < 3 and (df.at[i, 'FixingMenstruation'] == 'NotFixed'):
                    i += 1
                    df.at[i,'FixedMenstruationType'] = 'Haflaga'
                    df.at[i, 'Forecast_fixed'] = fixed_cycle
                    if lengths[i] == fixed_cycle:
                       cycles_from_fixing = 0
                    #    df.at[i,'FixingMenstruation'] = 'Haflaga'     
                    else:
                        cycles_from_fixing += 1        
            i += 1
    return fixing_indexes

def dilug_fill(df):
    fixing_indexes = []
    lengths = df['LengthofCycle'].copy()
    for start, finish in change_indexes:
        i = start + 2
        while i < finish:
            if lengths[i] - lengths[i-1] == lengths[i-1] - lengths[i-2] and lengths[i] - lengths[i-1] != 0:
                fixing_indexes.append(i)
                while i < finish and lengths[i+1] - lengths[i] == lengths[i] - lengths[i-1]:
                    i += 1
            i += 1
    return fixing_indexes

def week_fill(df):
    fixing_indexes = []
    weekdays_lenghts = [i for i in range(df['LengthofCycle'].min()+3,df['LengthofCycle'].max()+1,7)]
    for start, finish in change_indexes:
        i = start + 1
        while i < finish:
            if lengths[i] in weekdays_lenghts and lengths[i] == lengths[i-1]:
                fixing_indexes.append(i)
                while i < finish and lengths[i+1] == lengths[i]:
                    i += 1
            i += 1
    return fixing_indexes

def week_dilug_fill(df):
    fixing_indexes = []
    weekdays_lenghts = [30]
    for start, finish in change_indexes:
        i = start + 1
        while i < finish:
            if lengths[i] in weekdays_lenghts and lengths[i] == lengths[i-1]:
                fixing_indexes.append(i)
                while i < finish and lengths[i+1] == lengths[i]:
                    i += 1
            i += 1
    return fixing_indexes

def dilug_in_dilug_fill(df):
    fixing_indexes = []
    for start, finish in change_indexes:
        i = start + 3      
        while i < finish:
            b1, b2, b3 = lengths[i-2] - lengths[i-3], lengths[i-1] - lengths[i-2], lengths[i] - lengths[i-1]
            d1, d2 = b2 - b1, b3 - b2
            if d1 == d2 and d1 != 0:
                fixing_indexes.append(i)
                b, d = b3, d2
                while i + 1 < finish and lengths[i+1] == lengths[i] + b + d:
                    i += 1
                    b += d
            i += 1
    return fixing_indexes

In [45]:
hafalga_fill()
df[df['ClientID']=='nfp8045']

Unnamed: 0,ClientID,CycleNumber,LengthofCycle,FixingMenstruation,FixedMenstruationType,Forecast_fixed
167,nfp8045,1,31,NotFixed,NotFixed,
168,nfp8045,2,28,NotFixed,NotFixed,
169,nfp8045,3,32,NotFixed,NotFixed,
170,nfp8045,4,30,NotFixed,NotFixed,
171,nfp8045,5,30,Week_Dilug,NotFixed,
172,nfp8045,6,30,Haflaga,Week_Dilug,
173,nfp8045,7,30,NotFixed,Haflaga,
174,nfp8045,8,32,NotFixed,Haflaga,30.0
175,nfp8045,9,34,Dilug,Haflaga,30.0
176,nfp8045,10,32,NotFixed,Dilug,


In [20]:
df['ClientID'].unique()

array(['nfp8122', 'nfp8024', 'nfp8020', 'nfp8026', 'nfp8030', 'nfp8031',
       'nfp8032', 'nfp8036', 'nfp8040', 'nfp8041', 'nfp8042', 'nfp8043',
       'nfp8045', 'nfp8046', 'nfp8051', 'nfp8057', 'nfp8058', 'nfp8060',
       'nfp8062', 'nfp8064', 'nfp8066', 'nfp8068', 'nfp8072', 'nfp8073',
       'nfp8076', 'nfp8079', 'nfp8080', 'nfp8083', 'nfp8087', 'nfp8091',
       'nfp8094', 'nfp8099', 'nfp8100', 'nfp8101', 'nfp8102', 'nfp8107',
       'nfp8113', 'nfp8116', 'nfp8123', 'nfp8124', 'nfp8129', 'nfp8131',
       'nfp8133', 'nfp8137', 'nfp8140', 'nfp8143', 'nfp8149', 'nfp8150',
       'nfp8152', 'nfp8154', 'nfp8155', 'nfp8159', 'nfp8161', 'nfp8164',
       'nfp8165', 'nfp8168', 'nfp8172', 'nfp8173', 'nfp8174', 'nfp8176',
       'nfp8177', 'nfp8178', 'nfp8179', 'nfp8184', 'nfp8186', 'nfp8187',
       'nfp8188', 'nfp8190', 'nfp8193', 'nfp8195', 'nfp8196', 'nfp8197',
       'nfp8211', 'nfp8212', 'nfp8221', 'nfp8223', 'nfp8228', 'nfp8234',
       'nfp8235', 'nfp8237', 'nfp8238', 'nfp8240', 