In [1]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import MultiLabelBinarizer

pd.options.display.max_rows = 200

In [2]:
bugs = pd.read_csv('acnh_bugs.csv')
fish = pd.read_csv('acnh_fish.csv')

In [3]:
bugs['Category'] = 'Bugs'
fish['Category'] = 'Fish'

bugs.rename(columns={'Months: North Hem/South Hem':'Months'}, 
            inplace=True)
fish.rename(columns={'Months: North Hem/South Hem':'Months'}, 
            inplace=True)

In [4]:
bugs.head()

Unnamed: 0,Name,Price,Location,Time,Months,Category
0,Common Butterfly,160,Flowers,4am-7pm,Sept-Jun / Mar-Dec,Bugs
1,Yellow Butterly,160,Flying,4am-7pm,Mar-Oct / Mar-Dec,Bugs
2,Tiger Butterfly,240,Flying,4am-7pm,Mar-Sept / Sept-Mar,Bugs
3,Peacock Butterfly,2500,Rare flowers,4am-7pm,Mar-Jun / Sept-Dec,Bugs
4,Common Bluebottle,300,Flying,4am-7pm,Apr-Aug / Oct-Feb,Bugs


In [5]:
fish.head()

Unnamed: 0,Name,Price,Location,Time,Months,Category
0,Bitterling,900,River,All,Nov-Mar / May-Sept,Fish
1,Pale Chub,200,River,9am-4pm,All / All,Fish
2,Crucian Carp,160,River,All,All / All,Fish
3,Dace,240,River,4pm-9am,All / All,Fish
4,Carp,300,Pond,All,All / All,Fish


In [6]:
full_db = bugs.append(fish, sort=False)

In [7]:
full_db.head()

Unnamed: 0,Name,Price,Location,Time,Months,Category
0,Common Butterfly,160,Flowers,4am-7pm,Sept-Jun / Mar-Dec,Bugs
1,Yellow Butterly,160,Flying,4am-7pm,Mar-Oct / Mar-Dec,Bugs
2,Tiger Butterfly,240,Flying,4am-7pm,Mar-Sept / Sept-Mar,Bugs
3,Peacock Butterfly,2500,Rare flowers,4am-7pm,Mar-Jun / Sept-Dec,Bugs
4,Common Bluebottle,300,Flying,4am-7pm,Apr-Aug / Oct-Feb,Bugs


In [8]:
months_db = full_db['Months'].str.split(' ', n=-1, expand=True)

In [9]:
months_db.head()

Unnamed: 0,0,1,2
0,Sept-Jun,/,Mar-Dec
1,Mar-Oct,/,Mar-Dec
2,Mar-Sept,/,Sept-Mar
3,Mar-Jun,/,Sept-Dec
4,Apr-Aug,/,Oct-Feb


In [10]:
months_db['North_Months'] = months_db[0]
months_db['South_Months'] = months_db[2]

full_db['North_Months'] = months_db[0]
full_db['South_Months'] = months_db[2]

In [11]:
months_db.head()

Unnamed: 0,0,1,2,North_Months,South_Months
0,Sept-Jun,/,Mar-Dec,Sept-Jun,Mar-Dec
1,Mar-Oct,/,Mar-Dec,Mar-Oct,Mar-Dec
2,Mar-Sept,/,Sept-Mar,Mar-Sept,Sept-Mar
3,Mar-Jun,/,Sept-Dec,Mar-Jun,Sept-Dec
4,Apr-Aug,/,Oct-Feb,Apr-Aug,Oct-Feb


In [12]:
n_months = months_db['North_Months'].str.split('-', n=-1, expand=True)
s_months = months_db['South_Months'].str.split('-', n=-1, expand=True)

In [13]:
months = set(n_months[0])
months |= set(n_months[1])
months |= set(s_months[0])
months |= set(s_months[1])

In [14]:
print(months)

{None, 'May', 'Dec', 'Aug', 'Apr', 'March', 'Sept', 'Mar', 'June', 'Jan', 'Nov', 'Feb', 'July', 'All', 'Jul', 'Jun', 'Oct'}


In [15]:
months_dict = {'March':3, 'Feb':2, 
               'June':6, 'Apr':4, 
               'Sept':9, 'Dec':12, 
               'July':7, 'Mar':3, 
               'Nov':11, 'All':0, 
               'Oct':10, 'Jul':7, 
               'May':5, 'Jan':1, 
               'Aug':8, 'Jun':6, 
               None:0}

In [16]:
n_months['n_start'] = n_months[0].map(months_dict)
n_months['n_end'] = n_months[1].map(months_dict)

s_months['s_start'] = s_months[0].map(months_dict)
s_months['s_end'] = s_months[1].map(months_dict)

In [17]:
def month_range(months):
    s, e = int(months[0]), int(months[1])
    if s == 0:
        return np.arange(1, 13)
    if e == 0:
        return np.arange(s, s+1)
    if s <= e:
        return np.arange(s, e+1)
    else:
        return np.append(np.arange(1, e+1), np.arange(s, 13)) 

In [18]:
full_db['N_Months'] = n_months[['n_start', 'n_end']].values.tolist()
full_db['N_Months'] = full_db['N_Months'].apply(month_range)

full_db['S_Months'] = s_months[['s_start', 's_end']].values.tolist()
full_db['S_Months'] = full_db['S_Months'].apply(month_range)

In [19]:
full_db.drop(columns=['Months'], axis=1, inplace=True)

In [20]:
full_db.head()

Unnamed: 0,Name,Price,Location,Time,Category,North_Months,South_Months,N_Months,S_Months
0,Common Butterfly,160,Flowers,4am-7pm,Bugs,Sept-Jun,Mar-Dec,"[1, 2, 3, 4, 5, 6, 9, 10, 11, 12]","[3, 4, 5, 6, 7, 8, 9, 10, 11, 12]"
1,Yellow Butterly,160,Flying,4am-7pm,Bugs,Mar-Oct,Mar-Dec,"[3, 4, 5, 6, 7, 8, 9, 10]","[3, 4, 5, 6, 7, 8, 9, 10, 11, 12]"
2,Tiger Butterfly,240,Flying,4am-7pm,Bugs,Mar-Sept,Sept-Mar,"[3, 4, 5, 6, 7, 8, 9]","[1, 2, 3, 9, 10, 11, 12]"
3,Peacock Butterfly,2500,Rare flowers,4am-7pm,Bugs,Mar-Jun,Sept-Dec,"[3, 4, 5, 6]","[9, 10, 11, 12]"
4,Common Bluebottle,300,Flying,4am-7pm,Bugs,Apr-Aug,Oct-Feb,"[4, 5, 6, 7, 8]","[1, 2, 10, 11, 12]"


In [21]:
north_db = full_db.drop(columns=['South_Months'], axis=1)

In [22]:
south_db = full_db.drop(columns=['North_Months'], axis=1)

In [23]:
mlb = MultiLabelBinarizer()

In [24]:
north_db = north_db.join(pd.DataFrame(mlb.fit_transform(north_db['N_Months']), columns=mlb.classes_))

north_db.rename(columns={1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 
                         5:'May', 6:'Jun', 7:'Jul', 8:'Aug', 
                         9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}, 
                inplace=True)

north_db.drop(columns=['North_Months', 'N_Months', 'S_Months'], 
              axis=1, 
              inplace=True)

In [25]:
south_db = south_db.join(pd.DataFrame(mlb.fit_transform(south_db['S_Months']), columns=mlb.classes_))

south_db.rename(columns={1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 
                         5:'May', 6:'Jun', 7:'Jul', 8:'Aug', 
                         9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}, 
                inplace=True)

south_db.drop(columns=['South_Months', 'S_Months', 'N_Months'], 
              axis=1, 
              inplace=True)

In [26]:
north_db.head()

Unnamed: 0,Name,Price,Location,Time,Category,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec
0,Common Butterfly,160,Flowers,4am-7pm,Bugs,1,1,1,1,1,1,0,0,1,1,1,1
0,Bitterling,900,River,All,Fish,1,1,1,1,1,1,0,0,1,1,1,1
1,Yellow Butterly,160,Flying,4am-7pm,Bugs,0,0,1,1,1,1,1,1,1,1,0,0
1,Pale Chub,200,River,9am-4pm,Fish,0,0,1,1,1,1,1,1,1,1,0,0
2,Tiger Butterfly,240,Flying,4am-7pm,Bugs,0,0,1,1,1,1,1,1,1,0,0,0


In [27]:
south_db.head()

Unnamed: 0,Name,Price,Location,Time,Category,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec
0,Common Butterfly,160,Flowers,4am-7pm,Bugs,0,0,1,1,1,1,1,1,1,1,1,1
0,Bitterling,900,River,All,Fish,0,0,1,1,1,1,1,1,1,1,1,1
1,Yellow Butterly,160,Flying,4am-7pm,Bugs,0,0,1,1,1,1,1,1,1,1,1,1
1,Pale Chub,200,River,9am-4pm,Fish,0,0,1,1,1,1,1,1,1,1,1,1
2,Tiger Butterfly,240,Flying,4am-7pm,Bugs,1,1,1,0,0,0,0,0,1,1,1,1


In [28]:
def time_range(time):
    if time == 'All':
        return np.arange(0, 24)
    if len(time) > 10:
        temp = time.split(' / ')
        t1, t2 = temp[0], temp[1]
        return np.append(time_rge_helper(t1), time_rge_helper(t2))
    else:
        return time_rge_helper(time)
        
def time_rge_helper(time):
    temp = time.split('-')
    for t in range(len(temp)):
        if temp[t] == '12am':
            temp[t] = 0
        elif temp[t][:-2] != 12 and temp[t][-2:] == 'pm':
            temp[t] = int(temp[t][:-2]) + 12
        else:
            temp[t] = int(temp[t][:-2])
            
    t1, t2 = temp[0], temp[1]
    if t1 <= t2:
        return np.arange(t1, t2)
    else:
        return np.append(np.arange(0, t2), np.arange(t1, 24))

In [29]:
north_db['Time'] = north_db['Time'].apply(time_range)

north_db = north_db.join(pd.DataFrame(mlb.fit_transform(north_db['Time']), columns=mlb.classes_))

In [30]:
south_db['Time'] = south_db['Time'].apply(time_range)

south_db = south_db.join(pd.DataFrame(mlb.fit_transform(south_db['Time']), columns=mlb.classes_))

In [31]:
north_db.drop(columns=['Time'], axis=1, inplace=True)

In [32]:
south_db.drop(columns=['Time'], axis=1, inplace=True)

In [33]:
north_db.head()

Unnamed: 0,Name,Price,Location,Category,Jan,Feb,Mar,Apr,May,Jun,...,14,15,16,17,18,19,20,21,22,23
0,Common Butterfly,160,Flowers,Bugs,1,1,1,1,1,1,...,1,1,1,1,1,0,0,0,0,0
0,Bitterling,900,River,Fish,1,1,1,1,1,1,...,1,1,1,1,1,0,0,0,0,0
1,Yellow Butterly,160,Flying,Bugs,0,0,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
1,Pale Chub,200,River,Fish,0,0,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
2,Tiger Butterfly,240,Flying,Bugs,0,0,1,1,1,1,...,1,1,1,1,1,0,0,0,0,0


In [34]:
south_db.head()

Unnamed: 0,Name,Price,Location,Category,Jan,Feb,Mar,Apr,May,Jun,...,14,15,16,17,18,19,20,21,22,23
0,Common Butterfly,160,Flowers,Bugs,0,0,1,1,1,1,...,1,1,1,1,1,0,0,0,0,0
0,Bitterling,900,River,Fish,0,0,1,1,1,1,...,1,1,1,1,1,0,0,0,0,0
1,Yellow Butterly,160,Flying,Bugs,0,0,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
1,Pale Chub,200,River,Fish,0,0,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
2,Tiger Butterfly,240,Flying,Bugs,1,1,1,0,0,0,...,1,1,1,1,1,0,0,0,0,0


In [35]:
def feature_db(db, month, time):
    months={1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 
            5:'May', 6:'Jun', 7:'Jul', 8:'Aug', 
            9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}
    temp = db[(db[months[month]] == 1) & (db[time] == 1)]
    temp = temp.sort_values(by=['Price'], axis=0, ascending=False)
    temp.reset_index(inplace=True)
    
    print(str(temp.shape[0]) + " specimens")
    
    return temp[['Name', 'Price', 'Location', 'Category']]

In [38]:
feature_db(north_db, 8, 6)

76 specimens


Unnamed: 0,Name,Price,Location,Category
0,Barreleye,15000,Sea,Fish
1,Dorado,15000,River,Fish
2,Stringfish,15000,Clifftop river,Fish
3,Saw Shark,12000,Sea,Fish
4,Golden Stag,12000,Trees,Bugs
5,Giraffe Stag,12000,Trees,Bugs
6,Sturgeon,10000,River mouth,Fish
7,Scarab Beetle,10000,Trees,Bugs
8,Arowana,10000,River,Fish
9,Napoleonfish,10000,Sea,Fish


In [39]:
feature_db(south_db, 8, 6)

18 specimens


Unnamed: 0,Name,Price,Location,Category
0,Barreleye,15000,Sea,Fish
1,Saw Shark,12000,Sea,Fish
2,Sturgeon,10000,River mouth,Fish
3,Tarantula,8000,Ground,Bugs
4,Mahi-mahi,6000,Pier,Fish
5,Snapping Turtle,5000,River,Fish
6,Ranchu Goldfish,4500,Pond,Fish
7,Emperor Butterfly,4000,Flying,Bugs
8,Ray,3000,Sea,Fish
9,Bitterling,900,River,Fish
