In [1]:
import pandas as pd

from collections import Counter, defaultdict
from itertools import combinations, combinations_with_replacement, permutations

## Preprocessing

- import the data
- convert weather encodings
- make index time-like
- re-order based on time

In [2]:
df = pd.read_csv('patterns.csv')

# to make more readable/mimic website formatting
patterns_dict = {
    'F': 'Clear/Fine',
    'C': 'Sunny',
    'O': 'Cloudy',
    'RC': 'Rain clouds',
    'R': 'Rain',
    'HR': 'Heavy rain'
}

# add time formatting
for col in df.columns:
    df[col] = df[col].map(patterns_dict)

# order the df to match game treatment of date roll-over
df.index = [str(each)+':00' for each in list(df.index)]
first_five_hours = list(df.index[:5])
rest_of_day = list(df.index[5:])
df = df.loc[rest_of_day + first_five_hours]

df.head(3)

Unnamed: 0,Fine00,Fine01,Fine02,Fine03,Fine04,Fine05,Fine06,Cloud00,Cloud01,Cloud02,...,FineRain00,FineRain01,FineRain02,FineRain03,CloudRain00,CloudRain01,CloudRain02,RainCloud00,RainCloud01,RainCloud02
5:00,Clear/Fine,Sunny,Sunny,Sunny,Sunny,Sunny,Sunny,Cloudy,Cloudy,Cloudy,...,Sunny,Sunny,Clear/Fine,Sunny,Rain,Cloudy,Rain,Cloudy,Rain clouds,Rain clouds
6:00,Sunny,Clear/Fine,Sunny,Sunny,Sunny,Sunny,Cloudy,Rain clouds,Cloudy,Cloudy,...,Sunny,Clear/Fine,Clear/Fine,Sunny,Cloudy,Sunny,Cloudy,Rain,Rain,Heavy rain
7:00,Clear/Fine,Sunny,Clear/Fine,Clear/Fine,Sunny,Sunny,Sunny,Cloudy,Cloudy,Rain clouds,...,Clear/Fine,Sunny,Sunny,Sunny,Cloudy,Sunny,Cloudy,Rain,Rain,Heavy rain


## Dropping types that don't exist in-game

Several types are in the data I yanked which don't show up as viable weather pattern options on the MeteoNook website - they might be for other things in the game (like Nook Miles islands.)

In particular,

- `Rain__` is not a user-island option
- `FineCloud__` is not a user-island option
- `CloudRain__` is not a user-island option
- `RainCloud__` is not a user-island option

In [3]:
rain_types = ['Rain00', 'Rain01', 'Rain02', 'Rain03', 'Rain04', 'Rain05']
finecloud_types = ['FineCloud00', 'FineCloud01', 'FineCloud02']
cloudrain_types = ['CloudRain00', 'CloudRain01', 'CloudRain02']
raincloud_types = ['RainCloud00', 'RainCloud01', 'RainCloud02']

drop_cols = rain_types + finecloud_types + cloudrain_types + raincloud_types

df.drop(columns=drop_cols, inplace=True)
df.shape

(24, 17)

## Evaluating optimal hour checks

My question is basically "assuming uniform probabilities for each weather pattern" (which might not be the case) "is there some 2 or 3 hours that are most efficient to check?"

Ideally I'd like to find some 2 or 3 hours which uniquely identify any of the 17 weather patterns.

### Setting up combinations

Here is some code from when I was remembering how `combinations` works.

In [4]:
# generator, so for tinkering chuck to list and index
two_times = list(combinations(df.index, 2))[0]

In [5]:
# need to cast to list to get to work w/ loc
df.loc[list(two_times)]

Unnamed: 0,Fine00,Fine01,Fine02,Fine03,Fine04,Fine05,Fine06,Cloud00,Cloud01,Cloud02,CloudFine00,CloudFine01,CloudFine02,FineRain00,FineRain01,FineRain02,FineRain03
5:00,Clear/Fine,Sunny,Sunny,Sunny,Sunny,Sunny,Sunny,Cloudy,Cloudy,Cloudy,Rain clouds,Sunny,Clear/Fine,Sunny,Sunny,Clear/Fine,Sunny
6:00,Sunny,Clear/Fine,Sunny,Sunny,Sunny,Sunny,Cloudy,Rain clouds,Cloudy,Cloudy,Rain,Rain,Cloudy,Sunny,Clear/Fine,Clear/Fine,Sunny


There are 24 choose 2 combinations of hours to consider:

In [6]:
len(list(combinations(df.index, 2)))

276

How many possible weather combinations are there?

In [7]:
w = list(patterns_dict.values())
w

['Clear/Fine', 'Sunny', 'Cloudy', 'Rain clouds', 'Rain', 'Heavy rain']

In [8]:
len(list(combinations_with_replacement(w, 2)))

21

In [9]:
len(list(permutations(w, 2)))

30

### Is there any combination of 2 hours that will successfully identify weather pattern?

There are 17 weather patterns. If any 2 hours successfully identities the weather pattern for all weather patterns, the length of the set of tuples representing weather at each hour should be 17.

In [10]:
two_times

('5:00', '6:00')

In [11]:
# Testing on just one tuple first...
list(df.loc[list(two_times)].T.itertuples(name=None, index=False))[:5]

[('Clear/Fine', 'Sunny'),
 ('Sunny', 'Clear/Fine'),
 ('Sunny', 'Sunny'),
 ('Sunny', 'Sunny'),
 ('Sunny', 'Sunny')]

In [12]:
# clearly 5:00 and 6:00 will not work!
# there are only 10 unique weather patterns in 5am and 6am
len(set(list(df.loc[list(two_times)].T.itertuples(name=None, index=False))))

10

Set up a function to evaluate if a given tuple of times results in 17 unique weather patterns...

In [13]:
def evaluate_if_unique(dataframe, times):
    '''
    This function accepts a dataframe and a tuple of times.
    
    It checks if the selected times have 17 unique weather combos.
    '''
    
    times = list(times)
    subset = dataframe.loc[times]
    # how did i delete 'set' earlier...?
    n_weather_combos = len(set(subset.T.itertuples(name=None, index=False)))
    
    return n_weather_combos == 17

In [14]:
# example
print(two_times)
evaluate_if_unique(df, two_times)

('5:00', '6:00')


False

It doesn't look like there's a pair of hours that works.

In [15]:
failures = []
for times in combinations(df.index, 2):
    if evaluate_if_unique(df, times):
        print(times)
    else:
        failures.append(times)
print(len(failures))

276


Tinkering...

In [16]:
all_times_2 = list(combinations(df.index, 2))
all_times_2[0]

('5:00', '6:00')

In [17]:
def df_from_times(dataframe, times):
    ''' ...? '''
    times = list(times)
    return df.loc[times]

In [18]:
df_from_times(df, all_times_2[0])

Unnamed: 0,Fine00,Fine01,Fine02,Fine03,Fine04,Fine05,Fine06,Cloud00,Cloud01,Cloud02,CloudFine00,CloudFine01,CloudFine02,FineRain00,FineRain01,FineRain02,FineRain03
5:00,Clear/Fine,Sunny,Sunny,Sunny,Sunny,Sunny,Sunny,Cloudy,Cloudy,Cloudy,Rain clouds,Sunny,Clear/Fine,Sunny,Sunny,Clear/Fine,Sunny
6:00,Sunny,Clear/Fine,Sunny,Sunny,Sunny,Sunny,Cloudy,Rain clouds,Cloudy,Cloudy,Rain,Rain,Cloudy,Sunny,Clear/Fine,Clear/Fine,Sunny


I want to know _what combinations occur at what hours, and how often_.

In [19]:
c = Counter(list(df_from_times(df, all_times_2[0]).T.itertuples(name=None, index=False)))

In [20]:
d = {}

for times in combinations(df.index, 2):
    d[times] = Counter(list(df_from_times(df, times).T.itertuples(name=None, index=False)))

In [21]:
pd.DataFrame({str(k): [len(v)] for k, v in d.items()}).T.sort_values(by=0).tail(7)

Unnamed: 0,0
"('6:00', '14:00')",12
"('12:00', '15:00')",12
"('6:00', '13:00')",12
"('9:00', '12:00')",12
"('10:00', '12:00')",12
"('7:00', '13:00')",12
"('6:00', '15:00')",13


In [22]:
# WHY does k need to be cast to str here?? 
# something about unpacking screwing up the tuple...??? follow up
times_to_check = pd.DataFrame({str(k): [len(v)] for k, v in d.items()}).T.sort_values(by=0).tail(7).index

In [23]:
times_to_check

Index(['('6:00', '14:00')', '('12:00', '15:00')', '('6:00', '13:00')',
       '('9:00', '12:00')', '('10:00', '12:00')', '('7:00', '13:00')',
       '('6:00', '15:00')'],
      dtype='object')

In [24]:
d[('7:00', '13:00')]

Counter({('Clear/Fine', 'Clear/Fine'): 1,
         ('Clear/Fine', 'Cloudy'): 1,
         ('Clear/Fine', 'Rain'): 1,
         ('Clear/Fine', 'Sunny'): 1,
         ('Cloudy', 'Cloudy'): 1,
         ('Cloudy', 'Sunny'): 2,
         ('Rain', 'Cloudy'): 1,
         ('Rain clouds', 'Rain clouds'): 1,
         ('Rain clouds', 'Sunny'): 1,
         ('Sunny', 'Clear/Fine'): 1,
         ('Sunny', 'Cloudy'): 1,
         ('Sunny', 'Sunny'): 5})

In [25]:
d[('6:00', '15:00')]

Counter({('Clear/Fine', 'Clear/Fine'): 1,
         ('Clear/Fine', 'Heavy rain'): 1,
         ('Clear/Fine', 'Sunny'): 1,
         ('Cloudy', 'Clear/Fine'): 1,
         ('Cloudy', 'Cloudy'): 1,
         ('Cloudy', 'Rain'): 1,
         ('Cloudy', 'Sunny'): 1,
         ('Rain', 'Cloudy'): 1,
         ('Rain', 'Sunny'): 1,
         ('Rain clouds', 'Sunny'): 1,
         ('Sunny', 'Clear/Fine'): 1,
         ('Sunny', 'Rain'): 1,
         ('Sunny', 'Sunny'): 5})

## OK -- my two candidate pairs are...
- `('6:00', '15:00')`
- `('7:00', '13:00')`

They each have 12 unique combinations, and sunny/sunny shows up 5 times.

### The next question

For sunny/sunny days on my candidate pair time pairs, **what is the third time I should check** to uniquely identify the weather pattern?

This will be different for each.

### Testing 6 & 15

In [26]:
# I want just the column names where both rows are sunny
pair_6_15 = df_from_times(df, ('6:00', '15:00'))

sunny_6 = pair_6_15.T['6:00'] == 'Sunny'
sunny_15 = pair_6_15.T['15:00'] == 'Sunny'

# pair_6_15.T[sunny_6 & sunny_15]
day_types_6_15 = pair_6_15.T[sunny_6 & sunny_15].index

df_6_15 = df[day_types_6_15].copy()
df_6_15.drop(['6:00', '15:00'], inplace=True)

In [27]:
list(df_6_15.iterrows())[0]

('5:00', Fine00        Clear/Fine
 Fine02             Sunny
 Fine04             Sunny
 Fine05             Sunny
 FineRain00         Sunny
 Name: 5:00, dtype: object)

In [28]:
len(set(df_6_15.iloc[0].values))

2

In [29]:
dates_6_15 = []

for ix in range(22):
    if len(set(df_6_15.iloc[ix].values)) > 2:
        dates_6_15.append(ix)

In [30]:
df_6_15.iloc[dates_6_15]

Unnamed: 0,Fine00,Fine02,Fine04,Fine05,FineRain00
9:00,Clear/Fine,Clear/Fine,Cloudy,Sunny,Clear/Fine
13:00,Sunny,Clear/Fine,Sunny,Sunny,Rain
14:00,Clear/Fine,Sunny,Cloudy,Cloudy,Sunny
18:00,Sunny,Cloudy,Clear/Fine,Sunny,Sunny
19:00,Clear/Fine,Sunny,Sunny,Cloudy,Clear/Fine


### Testing 7 & 13

In [31]:
# df_from_times(df, ('7:00', '13:00'))

pair_7_13 = df_from_times(df, ('7:00', '13:00'))

sunny_7 = pair_7_13.T['7:00'] == 'Sunny'
sunny_13 = pair_7_13.T['13:00'] == 'Sunny'

# pair_7_13.T[sunny_7 & sunny_13]

day_types_7_13 = pair_7_13.T[sunny_7 & sunny_13].index

df_7_13 = df[day_types_7_13].copy()
df_7_13.drop(['7:00', '13:00'], inplace=True)

In [32]:
df_7_13.shape

(22, 5)

In [33]:
dates_7_13 = []

for ix in range(22):
    if len(set(df_7_13.iloc[ix].values)) > 2:
        dates_7_13.append(ix)

In [34]:
dates_7_13

[1, 7, 8, 9, 10, 11, 12, 17]

In [35]:
dates_7_13

[1, 7, 8, 9, 10, 11, 12, 17]

In [36]:
dates_7_13 = []

for ix in range(22):
    if len(set(df_7_13.iloc[ix].values)) > 3:
        dates_7_13.append(ix)

In [37]:
dates_7_13

[8]

In [38]:
# if sunny at 15:00 still haven't confirmed what pattern
df_7_13.iloc[8]

Fine04             Sunny
Fine05             Sunny
Fine06        Clear/Fine
FineRain02    Heavy rain
FineRain03          Rain
Name: 15:00, dtype: object

In [39]:
# but checking 16:00 should fix that
df_7_13.iloc[[8, 9]]

Unnamed: 0,Fine04,Fine05,Fine06,FineRain02,FineRain03
15:00,Sunny,Sunny,Clear/Fine,Heavy rain,Rain
16:00,Clear/Fine,Sunny,Sunny,Cloudy,Sunny


# Conclusion

- Test 7:00 and 13:00. Often the weather at just those two hours will identify the day's pattern.
- If 7:00 and 13:00 are both sunny, check 15:00.
- If 15:00 is sunny, check 16:00.

This may not be the _maximally_ efficient way to hone in on a weather seed using MeteoNook, but it worked well for me. I used a combination of data collected while playing the game normally & 