In [1]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import MultiLabelBinarizer

In [2]:
bugs = pd.read_csv('acnh_bugs.csv')
fish = pd.read_csv('acnh_fish.csv')

In [3]:
bugs['Category'] = 'Bugs'
fish['Category'] = 'Fish'

bugs.rename(columns={'Months: North Hem/South Hem':'Months'}, 
            inplace=True)
fish.rename(columns={'Months: North Hem/South Hem':'Months'}, 
            inplace=True)

In [4]:
bugs_months = bugs['Months'].str.split(' ', 
                                       n=-1, 
                                       expand=True)

fish_months = fish['Months'].str.split(' ', 
                                       n=-1, 
                                       expand=True)

In [5]:
bugs['North_Months'] = bugs_months[0]
bugs['South_Months'] = bugs_months[2]

fish['North_Months'] = fish_months[0]
fish['South_Months'] = fish_months[2]

In [6]:
bugs.drop(columns=['Months'], 
          axis=1, 
          inplace=True)

fish.drop(columns=['Months'], 
          axis=1, 
          inplace=True)

In [7]:
bugs_n_months = bugs['North_Months'].str.split('-', 
                                               n=-1, 
                                               expand=True)
bugs_s_months = bugs['South_Months'].str.split('-', 
                                               n=-1, 
                                               expand=True)

fish_n_months = fish['North_Months'].str.split('-', 
                                               n=-1, 
                                               expand=True)
fish_s_months = fish['South_Months'].str.split('-', 
                                               n=-1, 
                                               expand=True)

In [8]:
months = set(bugs_n_months[0])
months |= set(bugs_n_months[1])
months |= set(bugs_s_months[0])
months |= set(bugs_s_months[1])
months |= set(fish_n_months[0])
months |= set(fish_n_months[1])
months |= set(fish_s_months[0])
months |= set(fish_s_months[1])

In [9]:
print(months)

{'All', None, 'Mar', 'Sept', 'Oct', 'Jun', 'Dec', 'July', 'March', 'Jul', 'Feb', 'Jan', 'Nov', 'May', 'Apr', 'Aug', 'June'}


In [10]:
months_dict = {'March':3, 'Feb':2, 
               'June':6, 'Apr':4, 
               'Sept':9, 'Dec':12, 
               'July':7, 'Mar':3, 
               'Nov':11, 'All':0, 
               'Oct':10, 'Jul':7, 
               'May':5, 'Jan':1, 
               'Aug':8, 'Jun':6, 
               None:0}

In [11]:
bugs_n_months['bugs_n_start'] = bugs_n_months[0].map(months_dict)
bugs_n_months['bugs_n_end'] = bugs_n_months[1].map(months_dict)
# bugs_s_months['bugs_s_start'] = bugs_s_months[0].map(months_dict)
# bugs_s_months['bugs_s_end'] = bugs_s_months[1].map(months_dict)

# fish_n_months['fish_n_start'] = fish_n_months[0].map(months_dict)
# fish_n_months['fish_n_end'] = fish_n_months[1].map(months_dict)
# fish_s_months['fish_s_start'] = fish_s_months[0].map(months_dict)
# fish_s_months['fish_s_end'] = fish_s_months[1].map(months_dict)

In [12]:
def month_range(months):
    s, e = int(months[0]), int(months[1])
    if s == 0:
        return np.arange(1, 13)
    if e == 0:
        return np.arange(s, s+1)
    if s <= e:
        return np.arange(s, e+1)
    else:
        return np.append(np.arange(1, e+1), np.arange(s, 13)) 

In [13]:
bugs['N_Months'] = bugs_n_months[['bugs_n_start', 'bugs_n_end']].values.tolist()
bugs['N_Months'] = bugs['N_Months'].apply(month_range)

# bugs['N_Months_Start'] = bugs_n_months['bugs_n_start']
# bugs['N_Months_End'] = bugs_n_months['bugs_n_end']
# bugs['S_Months_Start'] = bugs_s_months['bugs_s_start']
# bugs['S_Months_End'] = bugs_s_months['bugs_s_end']

# fish['N_Months_Start'] = fish_n_months['fish_n_start']
# fish['N_Months_End'] = fish_n_months['fish_n_end']
# fish['S_Months_Start'] = fish_s_months['fish_s_start']
# fish['S_Months_End'] = fish_s_months['fish_s_end']

In [14]:
bugs.head()

Unnamed: 0,Name,Price,Location,Time,Category,North_Months,South_Months,N_Months
0,Common Butterfly,160,Flowers,4am-7pm,Bugs,Sept-Jun,Mar-Dec,"[1, 2, 3, 4, 5, 6, 9, 10, 11, 12]"
1,Yellow Butterly,160,Flying,4am-7pm,Bugs,Mar-Oct,Mar-Dec,"[3, 4, 5, 6, 7, 8, 9, 10]"
2,Tiger Butterfly,240,Flying,4am-7pm,Bugs,Mar-Sept,Sept-Mar,"[3, 4, 5, 6, 7, 8, 9]"
3,Peacock Butterfly,2500,Rare flowers,4am-7pm,Bugs,Mar-Jun,Sept-Dec,"[3, 4, 5, 6]"
4,Common Bluebottle,300,Flying,4am-7pm,Bugs,Apr-Aug,Oct-Feb,"[4, 5, 6, 7, 8]"


In [16]:
acnh_db = bugs.drop(columns=['Time', 'South_Months'], axis=1)

In [17]:
mlb = MultiLabelBinarizer()

acnh_db = acnh_db.join(pd.DataFrame(mlb.fit_transform(acnh_db['N_Months']), columns=mlb.classes_))

In [22]:
acnh_db.rename(columns={1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 
                        5:'May', 6:'Jun', 7:'Jul', 8:'Aug', 
                        9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}, 
               inplace=True)

acnh_db.drop(columns=['N_Months'], axis=1)

Unnamed: 0,Name,Price,Location,Category,North_Months,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec
0,Common Butterfly,160,Flowers,Bugs,Sept-Jun,1,1,1,1,1,1,0,0,1,1,1,1
1,Yellow Butterly,160,Flying,Bugs,Mar-Oct,0,0,1,1,1,1,1,1,1,1,0,0
2,Tiger Butterfly,240,Flying,Bugs,Mar-Sept,0,0,1,1,1,1,1,1,1,0,0,0
3,Peacock Butterfly,2500,Rare flowers,Bugs,Mar-Jun,0,0,1,1,1,1,0,0,0,0,0,0
4,Common Bluebottle,300,Flying,Bugs,Apr-Aug,0,0,0,1,1,1,1,1,0,0,0,0
5,Paper Kite Butterfly,1000,Flying,Bugs,Apr-Aug,0,0,0,1,1,1,1,1,0,0,0,0
6,Great Purple Emperor,3000,Flying,Bugs,May-Aug,0,0,0,0,1,1,1,1,0,0,0,0
7,Monarch Butterfly,140,Flying,Bugs,Sept-Nov,0,0,0,0,0,0,0,0,1,1,1,0
8,Emperor Butterfly,4000,Flying,Bugs,Jun-Mar,1,1,1,0,0,1,1,1,1,1,1,1
9,Agrias Butterfly,3000,Flying,Bugs,Apr-Sept,0,0,0,1,1,1,1,1,1,0,0,0


In [20]:
acnh_db.head()

Unnamed: 0,Name,Price,Location,Category,North_Months,N_Months,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec
0,Common Butterfly,160,Flowers,Bugs,Sept-Jun,"[1, 2, 3, 4, 5, 6, 9, 10, 11, 12]",1,1,1,1,1,1,0,0,1,1,1,1
1,Yellow Butterly,160,Flying,Bugs,Mar-Oct,"[3, 4, 5, 6, 7, 8, 9, 10]",0,0,1,1,1,1,1,1,1,1,0,0
2,Tiger Butterfly,240,Flying,Bugs,Mar-Sept,"[3, 4, 5, 6, 7, 8, 9]",0,0,1,1,1,1,1,1,1,0,0,0
3,Peacock Butterfly,2500,Rare flowers,Bugs,Mar-Jun,"[3, 4, 5, 6]",0,0,1,1,1,1,0,0,0,0,0,0
4,Common Bluebottle,300,Flying,Bugs,Apr-Aug,"[4, 5, 6, 7, 8]",0,0,0,1,1,1,1,1,0,0,0,0
