In [1]:
import pandas as pd

## Preprocessing

- import the data
- convert weather encodings
- make index time-like
- re-order based on time

In [67]:
df = pd.read_csv('patterns.csv')

# to make more readable/mimic website formatting
patterns_dict = {
    'F': 'Clear/Fine',
    'C': 'Sunny',
    'O': 'Cloudy',
    'RC': 'Rain clouds',
    'R': 'Rain',
    'HR': 'Heavy rain'
}

# add time formatting
for col in df.columns:
    df[col] = df[col].map(patterns_dict)

# order the df the same way the game orders it
df.index = [str(each)+':00' for each in list(df.index)]
first_five_hours = list(df.index[:5])
rest_of_day = list(df.index[5:])
df = df.loc[rest_of_day + first_five_hours]

df.head(3)

Unnamed: 0,Fine00,Fine01,Fine02,Fine03,Fine04,Fine05,Fine06,Cloud00,Cloud01,Cloud02,...,FineRain00,FineRain01,FineRain02,FineRain03,CloudRain00,CloudRain01,CloudRain02,RainCloud00,RainCloud01,RainCloud02
5:00,Clear/Fine,Sunny,Sunny,Sunny,Sunny,Sunny,Sunny,Cloudy,Cloudy,Cloudy,...,Sunny,Sunny,Clear/Fine,Sunny,Rain,Cloudy,Rain,Cloudy,Rain clouds,Rain clouds
6:00,Sunny,Clear/Fine,Sunny,Sunny,Sunny,Sunny,Cloudy,Rain clouds,Cloudy,Cloudy,...,Sunny,Clear/Fine,Clear/Fine,Sunny,Cloudy,Sunny,Cloudy,Rain,Rain,Heavy rain
7:00,Clear/Fine,Sunny,Clear/Fine,Clear/Fine,Sunny,Sunny,Sunny,Cloudy,Cloudy,Rain clouds,...,Clear/Fine,Sunny,Sunny,Sunny,Cloudy,Sunny,Cloudy,Rain,Rain,Heavy rain


## Dropping types that don't exist in-game

Several types are in the data I yanked which don't show up as options on the MeteoNook website - they might be for other things in the game (like Nook Miles islands.)

In particular,

- `Rain__` is not a user-island option
- `FineCloud__` is not a user-island option
- `CloudRain__` is not a user-island option
- `RainCloud__` is not a user-island option

In [68]:
# the rain types aren't found in-game:
rain_types = ['Rain00', 'Rain01', 'Rain02', 'Rain03', 'Rain04', 'Rain05']
finecloud_types = ['FineCloud00', 'FineCloud01', 'FineCloud02']
cloudrain_types = ['CloudRain00', 'CloudRain01', 'CloudRain02']
raincloud_types = ['RainCloud00', 'RainCloud01', 'RainCloud02']

drop_cols = rain_types + finecloud_types + cloudrain_types + raincloud_types

df.drop(columns=drop_cols, inplace=True)
df.shape

(24, 17)

## OK...

My question is basically "assuming uniform probabilities" (which might not be the case) "is there some 2 or 3 hours that are most efficient to check?"

Basically, is any set of 2 hours unique? Is any set of 3 hours unique?

I might be able to answer this question using sets, come to think of it.

In [76]:
w = list(patterns_dict.values())

In [69]:
df.head()

Unnamed: 0,Fine00,Fine01,Fine02,Fine03,Fine04,Fine05,Fine06,Cloud00,Cloud01,Cloud02,CloudFine00,CloudFine01,CloudFine02,FineRain00,FineRain01,FineRain02,FineRain03
5:00,Clear/Fine,Sunny,Sunny,Sunny,Sunny,Sunny,Sunny,Cloudy,Cloudy,Cloudy,Rain clouds,Sunny,Clear/Fine,Sunny,Sunny,Clear/Fine,Sunny
6:00,Sunny,Clear/Fine,Sunny,Sunny,Sunny,Sunny,Cloudy,Rain clouds,Cloudy,Cloudy,Rain,Rain,Cloudy,Sunny,Clear/Fine,Clear/Fine,Sunny
7:00,Clear/Fine,Sunny,Clear/Fine,Clear/Fine,Sunny,Sunny,Sunny,Cloudy,Cloudy,Rain clouds,Rain,Cloudy,Rain clouds,Clear/Fine,Sunny,Sunny,Sunny
8:00,Sunny,Sunny,Sunny,Sunny,Sunny,Clear/Fine,Sunny,Cloudy,Sunny,Rain,Cloudy,Cloudy,Rain,Sunny,Sunny,Clear/Fine,Clear/Fine
9:00,Clear/Fine,Sunny,Clear/Fine,Sunny,Cloudy,Sunny,Cloudy,Cloudy,Cloudy,Rain,Rain,Cloudy,Cloudy,Clear/Fine,Clear/Fine,Sunny,Sunny


In [78]:
df.loc[['5:00', '6:00']]

Unnamed: 0,Fine00,Fine01,Fine02,Fine03,Fine04,Fine05,Fine06,Cloud00,Cloud01,Cloud02,CloudFine00,CloudFine01,CloudFine02,FineRain00,FineRain01,FineRain02,FineRain03
5:00,Clear/Fine,Sunny,Sunny,Sunny,Sunny,Sunny,Sunny,Cloudy,Cloudy,Cloudy,Rain clouds,Sunny,Clear/Fine,Sunny,Sunny,Clear/Fine,Sunny
6:00,Sunny,Clear/Fine,Sunny,Sunny,Sunny,Sunny,Cloudy,Rain clouds,Cloudy,Cloudy,Rain,Rain,Cloudy,Sunny,Clear/Fine,Clear/Fine,Sunny


In [101]:
from itertools import combinations, combinations_with_replacement

There are 21 possible two-hour weather combinations. (It can be sunny two hours in a row, etc.)

In [102]:
len(list(combinations_with_replacement(w, 2)))

21

This gives all possible two-hour hour combinations:

In [90]:
two_times = list(combinations(df.index, 2))[0]

In [95]:
df.loc[list(two_times)]

Unnamed: 0,Fine00,Fine01,Fine02,Fine03,Fine04,Fine05,Fine06,Cloud00,Cloud01,Cloud02,CloudFine00,CloudFine01,CloudFine02,FineRain00,FineRain01,FineRain02,FineRain03
5:00,Clear/Fine,Sunny,Sunny,Sunny,Sunny,Sunny,Sunny,Cloudy,Cloudy,Cloudy,Rain clouds,Sunny,Clear/Fine,Sunny,Sunny,Clear/Fine,Sunny
6:00,Sunny,Clear/Fine,Sunny,Sunny,Sunny,Sunny,Cloudy,Rain clouds,Cloudy,Cloudy,Rain,Rain,Cloudy,Sunny,Clear/Fine,Clear/Fine,Sunny


I need to get these into tuple pairs and then evaluate if `len(tup) == 17` I think.

Then to pass into `loc` I do need to first cast to a list.

In [124]:
# well well well
list(df.loc[list(two_times)].T.itertuples(name=None, index=False))[:5]

[('Clear/Fine', 'Sunny'),
 ('Sunny', 'Clear/Fine'),
 ('Sunny', 'Sunny'),
 ('Sunny', 'Sunny'),
 ('Sunny', 'Sunny')]

In [126]:
len(set(list(df.loc[list(two_times)].T.itertuples(name=None, index=False))))

10

In [122]:
def evaluate_if_unique(dataframe, times):
    '''
    This function accepts a dataframe and a tuple of times.
    
    It checks if the selected times have 17 unique weather combos.
    '''
    
    times = list(times)
    subset = dataframe.loc[times]
    n_weather_combos = len(list(subset.T.itertuples(name=None, index=False)))
    
    return n_weather_combos != 17

Hmm. It would be too easy I guess.

In [131]:
for times in combinations(df.index, 2):
    if evaluate_if_unique(df, times):
        print(times)

In [132]:
all_times_2 = list(combinations(df.index, 2))

In [133]:
all_times_2[0]

('5:00', '6:00')

In [134]:
def df_from_times(dataframe, times):
    times = list(times)
    return df.loc[times]

In [136]:
df_from_times(df, all_times_2[0])

Unnamed: 0,Fine00,Fine01,Fine02,Fine03,Fine04,Fine05,Fine06,Cloud00,Cloud01,Cloud02,CloudFine00,CloudFine01,CloudFine02,FineRain00,FineRain01,FineRain02,FineRain03
5:00,Clear/Fine,Sunny,Sunny,Sunny,Sunny,Sunny,Sunny,Cloudy,Cloudy,Cloudy,Rain clouds,Sunny,Clear/Fine,Sunny,Sunny,Clear/Fine,Sunny
6:00,Sunny,Clear/Fine,Sunny,Sunny,Sunny,Sunny,Cloudy,Rain clouds,Cloudy,Cloudy,Rain,Rain,Cloudy,Sunny,Clear/Fine,Clear/Fine,Sunny


...I have tuples, so...I can make a dictionary!

In [147]:
from collections import Counter, defaultdict

In [152]:
# d = defaultdict(list)

In [140]:
df_from_times(df, all_times_2[0])

Unnamed: 0,Fine00,Fine01,Fine02,Fine03,Fine04,Fine05,Fine06,Cloud00,Cloud01,Cloud02,CloudFine00,CloudFine01,CloudFine02,FineRain00,FineRain01,FineRain02,FineRain03
5:00,Clear/Fine,Sunny,Sunny,Sunny,Sunny,Sunny,Sunny,Cloudy,Cloudy,Cloudy,Rain clouds,Sunny,Clear/Fine,Sunny,Sunny,Clear/Fine,Sunny
6:00,Sunny,Clear/Fine,Sunny,Sunny,Sunny,Sunny,Cloudy,Rain clouds,Cloudy,Cloudy,Rain,Rain,Cloudy,Sunny,Clear/Fine,Clear/Fine,Sunny


In [144]:
set(list(df_from_times(df, all_times_2[0]).T.itertuples(name=None, index=False)))

{('Clear/Fine', 'Clear/Fine'),
 ('Clear/Fine', 'Cloudy'),
 ('Clear/Fine', 'Sunny'),
 ('Cloudy', 'Cloudy'),
 ('Cloudy', 'Rain clouds'),
 ('Rain clouds', 'Rain'),
 ('Sunny', 'Clear/Fine'),
 ('Sunny', 'Cloudy'),
 ('Sunny', 'Rain'),
 ('Sunny', 'Sunny')}

In [146]:
df_from_times(df, all_times_2[0])

Unnamed: 0,Fine00,Fine01,Fine02,Fine03,Fine04,Fine05,Fine06,Cloud00,Cloud01,Cloud02,CloudFine00,CloudFine01,CloudFine02,FineRain00,FineRain01,FineRain02,FineRain03
5:00,Clear/Fine,Sunny,Sunny,Sunny,Sunny,Sunny,Sunny,Cloudy,Cloudy,Cloudy,Rain clouds,Sunny,Clear/Fine,Sunny,Sunny,Clear/Fine,Sunny
6:00,Sunny,Clear/Fine,Sunny,Sunny,Sunny,Sunny,Cloudy,Rain clouds,Cloudy,Cloudy,Rain,Rain,Cloudy,Sunny,Clear/Fine,Clear/Fine,Sunny


I want to know _what combinations occur at what hours, and how often_.

I can...actually probably use a counter for this :thinking:

In [148]:
c = Counter(list(df_from_times(df, all_times_2[0]).T.itertuples(name=None, index=False)))

In [153]:
d = {}

for times in combinations(df.index, 2):
    d[times] = Counter(list(df_from_times(df, times).T.itertuples(name=None, index=False)))

In [184]:
pd.DataFrame({str(k): [len(v)] for k, v in d.items()}).T.sort_values(by=0).tail(7)

Unnamed: 0,0
"('6:00', '14:00')",12
"('12:00', '15:00')",12
"('6:00', '13:00')",12
"('9:00', '12:00')",12
"('10:00', '12:00')",12
"('7:00', '13:00')",12
"('6:00', '15:00')",13


In [186]:
times_to_check = pd.DataFrame({str(k): [len(v)] for k, v in d.items()}).T.sort_values(by=0).tail(7).index

In [187]:
times_to_check

Index(['('6:00', '14:00')', '('12:00', '15:00')', '('6:00', '13:00')',
       '('9:00', '12:00')', '('10:00', '12:00')', '('7:00', '13:00')',
       '('6:00', '15:00')'],
      dtype='object')

In [199]:
d[('7:00', '13:00')]

Counter({('Clear/Fine', 'Clear/Fine'): 1,
         ('Clear/Fine', 'Cloudy'): 1,
         ('Clear/Fine', 'Rain'): 1,
         ('Clear/Fine', 'Sunny'): 1,
         ('Cloudy', 'Cloudy'): 1,
         ('Cloudy', 'Sunny'): 2,
         ('Rain', 'Cloudy'): 1,
         ('Rain clouds', 'Rain clouds'): 1,
         ('Rain clouds', 'Sunny'): 1,
         ('Sunny', 'Clear/Fine'): 1,
         ('Sunny', 'Cloudy'): 1,
         ('Sunny', 'Sunny'): 5})

In [200]:
d[('6:00', '15:00')]

Counter({('Clear/Fine', 'Clear/Fine'): 1,
         ('Clear/Fine', 'Heavy rain'): 1,
         ('Clear/Fine', 'Sunny'): 1,
         ('Cloudy', 'Clear/Fine'): 1,
         ('Cloudy', 'Cloudy'): 1,
         ('Cloudy', 'Rain'): 1,
         ('Cloudy', 'Sunny'): 1,
         ('Rain', 'Cloudy'): 1,
         ('Rain', 'Sunny'): 1,
         ('Rain clouds', 'Sunny'): 1,
         ('Sunny', 'Clear/Fine'): 1,
         ('Sunny', 'Rain'): 1,
         ('Sunny', 'Sunny'): 5})

## OK -- my two candidate pairs are...
- `('6:00', '15:00')`
- `('7:00', '13:00')`

They each have 12 unique combinations, and sunny/sunny shows up 5 times.

### The next question

For sunny/sunny days on my candidate pair time pairs, **what is the third time I should check** to uniquely identify the weather pattern?

This will be different for each.

### Testing 6 & 15

In [246]:
# I want just the column names where both rows are sunny
pair_6_15 = df_from_times(df, ('6:00', '15:00'))

sunny_6 = pair_6_15.T['6:00'] == 'Sunny'
sunny_15 = pair_6_15.T['15:00'] == 'Sunny'

# pair_6_15.T[sunny_6 & sunny_15]
day_types_6_15 = pair_6_15.T[sunny_6 & sunny_15].index

df_6_15 = df[day_types_6_15].copy()
df_6_15.drop(['6:00', '15:00'], inplace=True)

In [252]:
list(df_6_15.iterrows())[0]

('5:00', Fine00        Clear/Fine
 Fine02             Sunny
 Fine04             Sunny
 Fine05             Sunny
 FineRain00         Sunny
 Name: 5:00, dtype: object)

In [257]:
len(set(df_6_15.iloc[0].values))

2

In [275]:
dates_6_15 = []

for ix in range(22):
    if len(set(df_6_15.iloc[ix].values)) > 2:
        dates_6_15.append(ix)

In [276]:
df_6_15.iloc[dates_6_15]

Unnamed: 0,Fine00,Fine02,Fine04,Fine05,FineRain00
9:00,Clear/Fine,Clear/Fine,Cloudy,Sunny,Clear/Fine
13:00,Sunny,Clear/Fine,Sunny,Sunny,Rain
14:00,Clear/Fine,Sunny,Cloudy,Cloudy,Sunny
18:00,Sunny,Cloudy,Clear/Fine,Sunny,Sunny
19:00,Clear/Fine,Sunny,Sunny,Cloudy,Clear/Fine


### Testing 7 & 13

In [267]:
# df_from_times(df, ('7:00', '13:00'))

pair_7_13 = df_from_times(df, ('7:00', '13:00'))

sunny_7 = pair_7_13.T['7:00'] == 'Sunny'
sunny_13 = pair_7_13.T['13:00'] == 'Sunny'

# pair_7_13.T[sunny_7 & sunny_13]

day_types_7_13 = pair_7_13.T[sunny_7 & sunny_13].index

df_7_13 = df[day_types_7_13].copy()
df_7_13.drop(['7:00', '13:00'], inplace=True)

In [268]:
df_7_13.shape

(22, 5)

In [277]:
dates_7_13 = []

for ix in range(22):
    if len(set(df_7_13.iloc[ix].values)) > 3:
        dates_7_13.append(ix)

In [278]:
dates_7_13

[8]

# VICTORY

In [279]:
df_7_13.iloc[dates_7_13]

Unnamed: 0,Fine04,Fine05,Fine06,FineRain02,FineRain03
15:00,Sunny,Sunny,Clear/Fine,Heavy rain,Rain


Process: test 7:00, 13:00, and if both are sunny, check 15:00

If it's sunny again check 16:00

In [280]:
df_7_13

Unnamed: 0,Fine04,Fine05,Fine06,FineRain02,FineRain03
5:00,Sunny,Sunny,Sunny,Clear/Fine,Sunny
6:00,Sunny,Sunny,Cloudy,Clear/Fine,Sunny
8:00,Sunny,Clear/Fine,Sunny,Clear/Fine,Clear/Fine
9:00,Cloudy,Sunny,Cloudy,Sunny,Sunny
10:00,Sunny,Sunny,Clear/Fine,Clear/Fine,Clear/Fine
11:00,Sunny,Clear/Fine,Sunny,Sunny,Clear/Fine
12:00,Clear/Fine,Sunny,Clear/Fine,Clear/Fine,Sunny
14:00,Cloudy,Cloudy,Sunny,Sunny,Heavy rain
15:00,Sunny,Sunny,Clear/Fine,Heavy rain,Rain
16:00,Clear/Fine,Sunny,Sunny,Cloudy,Sunny


In [195]:
# d

In [175]:
df_from_times(df, ('6:00', '15:00'))

Unnamed: 0,Fine00,Fine01,Fine02,Fine03,Fine04,Fine05,Fine06,Cloud00,Cloud01,Cloud02,CloudFine00,CloudFine01,CloudFine02,FineRain00,FineRain01,FineRain02,FineRain03
6:00,Sunny,Clear/Fine,Sunny,Sunny,Sunny,Sunny,Cloudy,Rain clouds,Cloudy,Cloudy,Rain,Rain,Cloudy,Sunny,Clear/Fine,Clear/Fine,Sunny
15:00,Sunny,Clear/Fine,Sunny,Clear/Fine,Sunny,Sunny,Clear/Fine,Sunny,Cloudy,Rain,Sunny,Cloudy,Sunny,Sunny,Sunny,Heavy rain,Rain


In [178]:
d[('6:00', '15:00')]

Counter({('Clear/Fine', 'Clear/Fine'): 1,
         ('Clear/Fine', 'Heavy rain'): 1,
         ('Clear/Fine', 'Sunny'): 1,
         ('Cloudy', 'Clear/Fine'): 1,
         ('Cloudy', 'Cloudy'): 1,
         ('Cloudy', 'Rain'): 1,
         ('Cloudy', 'Sunny'): 1,
         ('Rain', 'Cloudy'): 1,
         ('Rain', 'Sunny'): 1,
         ('Rain clouds', 'Sunny'): 1,
         ('Sunny', 'Clear/Fine'): 1,
         ('Sunny', 'Rain'): 1,
         ('Sunny', 'Sunny'): 5})

In [181]:
df_from_times(df, ('6:00', '15:00'))

Unnamed: 0,Fine00,Fine01,Fine02,Fine03,Fine04,Fine05,Fine06,Cloud00,Cloud01,Cloud02,CloudFine00,CloudFine01,CloudFine02,FineRain00,FineRain01,FineRain02,FineRain03
6:00,Sunny,Clear/Fine,Sunny,Sunny,Sunny,Sunny,Cloudy,Rain clouds,Cloudy,Cloudy,Rain,Rain,Cloudy,Sunny,Clear/Fine,Clear/Fine,Sunny
15:00,Sunny,Clear/Fine,Sunny,Clear/Fine,Sunny,Sunny,Clear/Fine,Sunny,Cloudy,Rain,Sunny,Cloudy,Sunny,Sunny,Sunny,Heavy rain,Rain


In [234]:
# for k, v in d.items():
#     print(k)
#     print(len(v))
#     print()

[('5:00', '6:00'),
 ('5:00', '7:00'),
 ('5:00', '8:00'),
 ('5:00', '9:00'),
 ('5:00', '10:00')]

In [None]:
for times in combinations(df.index, 2):
    

In [59]:
df.shape

(24, 17)

In [60]:
df.columns

Index(['Fine00', 'Fine01', 'Fine02', 'Fine03', 'Fine04', 'Fine05', 'Fine06',
       'Cloud00', 'Cloud01', 'Cloud02', 'CloudFine00', 'CloudFine01',
       'CloudFine02', 'FineRain00', 'FineRain01', 'FineRain02', 'FineRain03'],
      dtype='object')