# Notebook 1 - Pobieranie i przetwarzanie danych

In [3]:
from bs4 import BeautifulSoup
import re   
import requests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import datetime as dt
import statsmodels.api as sm

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time, os
from webdriver_manager.chrome import ChromeDriverManager

from sklearn.metrics import mean_squared_error
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression, Lasso, LassoCV, Ridge, RidgeCV

%matplotlib inline

## Funkcje do scrapowania informacji o kartach

In [2]:
#pobiera nazwy kart
def get_name(soup):
    name = [link.text.strip() for link in soup.find_all('span', class_ = 'productDetailTitle')]
    return name


#pobiera z którego dodatku jest karta oraz jej rzadkość
def get_expansion_rarity_clean(soup):
    expansion_rarity_long = [link.text for link in soup.find_all('div', class_ = 'productDetailSet')]
    expansion_list = []
    rarity_list = []
    for expansion in expansion_rarity_long:
        expansion_and_rarity = expansion.strip()
        
        expansion_short = expansion_and_rarity[:-4]
        expansion_list.append(expansion_short)

        rarity = re.findall("[^()]", expansion_and_rarity[-3:])
        rarity_list.append(rarity[0])
        
    return expansion_list, rarity_list


#pobiera ceny kart
def get_price_clean(soup):
    price_long = soup.findAll("span", attrs = {"class": "stylePrice"})
    price_list = list()
    for price in price_long:
        x = price.text.strip()
        y = re.findall('\$(\d+.\d+)',x)
        conv_price = float(re.sub(",","",y[0]))
        price_list.append(conv_price)
    return price_list[::4]


#pobiera koszt zagrania karty
def get_mana_costs(soup):
    all_mana_costs_long = soup.findAll("div", attrs = {"class": "productDetailCastCost"})
    cost = []
    for card_cost in all_mana_costs_long:
        CMC_list = []
        for mana_symbol in card_cost.find_all('img'):
            CMC_list.append(re.findall('\w+', mana_symbol.get('src'))[3][-1])
        CMC_str=''
        cost.append(CMC_str.join(CMC_list))
    return cost


#wywołuje powyższe i konwertuje na ogolny koszt (powyższe daje koszt w konkretnych rodzajach many)
def get_converted_cost(soup):
    cmc = []

    for card_cost in get_mana_costs(soup):
        card_cmc=0
        for letter in card_cost:
            if letter.isdigit():
                card_cmc += int(letter)
            else: 
                card_cmc+=1
        cmc.append(card_cmc)
    return cmc


#pobiera kolor karty
def get_card_color(soup):
    color = []
    for card_cost in get_mana_costs(soup):
        card_color=[]
        for letter in card_cost:
            if letter != 'x':
                if not letter.isdigit():
                    card_color.append(letter)
                
        if card_color:
            color.append("".join(set(card_color)))
        else: color.append('colorless')
    return color

#pobiera zasady karty (ilosc znaków w tym tekście jest wykorzystywana jako jedna z cech karty)
def get_rules_text(soup):
    all_rules_text_long = soup.findAll("tr", attrs = {"class": "detailFlavortext"})
    
    rules_text = []
    for card_rules in all_rules_text_long:
        rules_text.append(card_rules.td.text.strip().replace('\n', ' '))
    
    return rules_text


#wywołuje wszystko powyżej po kolei
def all_scrape_modern():
    driver = webdriver.Chrome(ChromeDriverManager().install())
    base_url = 'https://www.cardkingdom.com/catalog/view?filter%5Bsort%5D=most_popular&filter%5Bsearch%5D=mtg_advanced&filter%5Btab%5D=&filter%5Bname%5D=&filter%5Bcategory_id%5D=2864&filter%5Bmulti%5D%5B0%5D=1&filter%5Btype_mode%5D=any&filter%5Btype_key%5D=&filter%5Bpow1%5D=&filter%5Bpow2%5D=&filter%5Btuf1%5D=&filter%5Btuf2%5D=&filter%5Bconcast1%5D=&filter%5Bconcast2%5D=&filter%5Bprice_op%5D=&filter%5Bprice%5D=&filter%5Bkey_text1%5D=&filter%5Bmanaprod_select%5D=any&page='
    page_number = 1
    url = base_url + str(page_number)
    number_of_pages = 400
    name = []
    expansion = []
    rarity = []
    mana_cost = []
    cmc = []
    color_identity = []
    rules_text = []
    price = []
    
    for i in range(number_of_pages):
        driver.get(url)
        soup = BeautifulSoup(driver.page_source)


        name_individual = get_name(soup)
        name.extend(name_individual)
        
        expansion_individual, rarity_individual = get_expansion_rarity_clean(soup)
        expansion.extend(expansion_individual)
        rarity.extend(rarity_individual)
        
        mana_cost_individual = get_mana_costs(soup)
        mana_cost.extend(mana_cost_individual)
        
        cmc_individual = get_converted_cost(soup)
        cmc.extend(cmc_individual)
        
        color_identity_individual = get_card_color(soup)
        color_identity.extend(color_identity_individual)
        
        rules_text_individual = get_rules_text(soup)
        rules_text.extend(rules_text_individual)
        
        price_individual = get_price_clean(soup)
        price.extend(price_individual)
        
        page_number += 1
        url = base_url + str(page_number)
        del soup
    
    return name, expansion, rarity, mana_cost, cmc, color_identity, rules_text, price


#pobiera (tym razem z wikipedii) nazwy dodatkow i daty ich wypuszczenia
def get_expansion_names_dates(soup):
    expansion_names = []
    set_dates = []

    for set_ in soup.find_all('table', class_ = 'wikitable')[1].find_all('tr')[3:115]:
        if not set_.td.get('colspan'):
            print(set_.find_all('td')[0].text)
            expansion_names.append(set_.find_all('td')[0].text.strip())

            print(set_.find_all('td')[6].text)
            set_dates.append(set_.find_all('td')[6].text)

            print('\n')
    return expansion_names, set_dates


#usuwa nawiasy z dat wypuszczenia dodatków
def remove_bracket_rel_dates(set_dates):
    set_dates_new = []
    for i in set_dates:
        if '[' in i:
            if i[-5] == '[':
                i = i[:-5]
                set_dates_new.append(i)
            else:
                i=i[:-6]
                set_dates_new.append(i)
        else: set_dates_new.append(i)        
    set_dates_new[51] = set_dates_new[51][:-5]
    return set_dates_new


#analogicznie do get_expansion_names_dates dla zestawów "Core"
def scrape_core_sets(soup):
    core_expansion_names = []
    core_set_dates = []

    for set_ in soup.find_all('table', class_ = 'wikitable')[0].find_all('tr')[2:100]:
        if not set_.td.get('colspan'):
            try: 
                if set_.find('a').text not in core_expansion_names:
                    print(set_.find('a').text)
                    core_expansion_names.append(set_.find('a').text.strip())

                print(set_.find_all('td')[4].text)
                core_set_dates.append(set_.find_all('td')[4].text)
            except: continue
    return core_expansion_names, core_set_dates


#analogicznie do remove_bracket_rel_dates
def remove_brackets_core_set_list(core_set_dates):
    core_set_dates_new = []
    for i in core_set_dates:
        if '[' in i:
            if i[-5] == '[': 
                i = i[:-5]
                core_set_dates_new.append(i)
            else:  
                i=i[:-4]
                core_set_dates_new.append(i)
        else: core_set_dates_new.append(i)        

    return core_set_dates_new


#analogicznie do get_expansion_names_dates dla uzupełniających zestawów
def scrape_reprint_comp_sets(soup):
    supp_expansion_names = []
    supp_set_dates = []


    for set_ in soup.find_all('table', class_ = 'wikitable')[4].find_all('tr')[2:100]: 
        if not set_.td.get('colspan'): 
            print(set_.find_all('td')[0].text)
            supp_expansion_names.append(set_.find_all('td')[0].text.strip())

            print(set_.find_all('td')[3].text)
            supp_set_dates.append(set_.find_all('td')[3].text.strip())

            print('\n')
    return supp_expansion_names, supp_set_dates


#analogicznie do remove_bracket_rel_dates
def remove_brackets_supp_sets(supp_set_dates):
    supp_set_dates_new = []
    for i in supp_set_dates:
        if '[' in i:
            i=i[:-5]
            supp_set_dates_new.append(i)
        else: supp_set_dates_new.append(i)        
    return supp_set_dates


#zamienia dni od wydania na date
def get_days(x):
    for i in modern_df['time_since_release']:
        try: 
            return x.days
        except: 
            return np.nan
        
def get_rarity_num(x):
    if (x == 'C'):
        return 1
    if (x == 'U'):
        return 2
    if (x == 'R'):
        return 3
    if (x == 'M'):
        return 4
        

# Scraping przy użyciu funkcji pomocniczych

In [3]:
name, expansion, rarity, mana_cost, cmc, color_identity, rules_text, price = all_scrape_modern()



In [4]:
data_tuples_modern = list(zip(name, expansion, rarity, mana_cost, cmc, color_identity, rules_text, price))

modern_df = pd.DataFrame(data_tuples_modern, columns = ['name','expansion', 'rarity', 'cost', 'cmc', 'color_identity', 'rules_text', 'price'])

modern_df

Unnamed: 0,name,expansion,rarity,cost,cmc,color_identity,rules_text,price
0,Bazaar of Baghdad,Arabian Nights,U,,0,colorless,": Draw two cards, then discard three cards.",2999.00
1,Volcanic Island,3rd Edition,R,,0,colorless,: Add or .,899.99
2,Tundra,3rd Edition,R,,0,colorless,: Add or .,529.99
3,Underground Sea,3rd Edition,R,,0,colorless,: Add or .,899.99
4,Mox Diamond,Stronghold,R,,0,colorless,"If Mox Diamond would enter the battlefield, yo...",749.99
...,...,...,...,...,...,...,...,...
9995,Open the Vaults,Commander 2015,R,4ww,6,w,Return all artifact and enchantment cards from...,2.79
9996,Swamp (C),Unlimited,L,,0,colorless,,2.79
9997,Demon of Dark Schemes,Kaladesh,M,3bbb,6,b,Flying When Demon of Dark Schemes enters the b...,1.99
9998,Curse of Unbinding (Extended Art),Innistrad: Midnight Hunt Commander Decks Variants,R,6u,7,u,Enchant player At the beginning of enchanted p...,1.99


In [5]:
modern_df.shape

(10000, 8)

In [6]:
#backup
filename = 'modern_data_baseline_PICKLE'
outfile = open(filename,'wb')
pickle.dump(modern_df,outfile)
outfile.close()

### Usuwam specjalne rzadkości i podstawowe karty krain

In [81]:
modern_df = pd.read_pickle('modern_data_baseline_PICKLE')

mask = (modern_df['rarity'] != 'L') & (modern_df['rarity'] != 'S')

modern_df = modern_df[mask]

modern_df.shape

(9155, 8)

### Usuwam Extended Art, Borderless, Showcase, Godzilla Series

In [82]:
ex_art_mask = ~modern_df['name'].str.contains("Extended Art")

modern_df = modern_df[ex_art_mask] 

borderless_mask = ~modern_df['name'].str.contains("Borderless")

modern_df = modern_df[borderless_mask] 

showcase_mask = ~modern_df['name'].str.contains("Showcase")

modern_df = modern_df[showcase_mask]

godzilla_mask = ~modern_df['name'].str.contains("Godzilla")

modern_df = modern_df[godzilla_mask]

modern_df.shape

(8381, 8)

### Zmieniam indeks na kolumnę z nazwą

In [83]:
modern_df.set_index('name', drop=True, inplace = True)


modern_df.head()

Unnamed: 0_level_0,expansion,rarity,cost,cmc,color_identity,rules_text,price
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Bazaar of Baghdad,Arabian Nights,U,,0,colorless,": Draw two cards, then discard three cards.",2999.0
Volcanic Island,3rd Edition,R,,0,colorless,: Add or .,899.99
Tundra,3rd Edition,R,,0,colorless,: Add or .,529.99
Underground Sea,3rd Edition,R,,0,colorless,: Add or .,899.99
Mox Diamond,Stronghold,R,,0,colorless,"If Mox Diamond would enter the battlefield, yo...",749.99


In [84]:
#fix do color identity
modern_df['color_identity'].replace('n', 'colorless', inplace=True)
modern_df['color_identity'].replace('e', 'colorless', inplace=True)

modern_df.head()

Unnamed: 0_level_0,expansion,rarity,cost,cmc,color_identity,rules_text,price
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Bazaar of Baghdad,Arabian Nights,U,,0,colorless,": Draw two cards, then discard three cards.",2999.0
Volcanic Island,3rd Edition,R,,0,colorless,: Add or .,899.99
Tundra,3rd Edition,R,,0,colorless,: Add or .,529.99
Underground Sea,3rd Edition,R,,0,colorless,: Add or .,899.99
Mox Diamond,Stronghold,R,,0,colorless,"If Mox Diamond would enter the battlefield, yo...",749.99


## Kodowanie koloru do 5 kolumn

In [85]:
is_w = []
is_u = []
is_b = []
is_r = []
is_g = []

for card_color_identity in modern_df['color_identity']:
    if card_color_identity != 'colorless':
        if 'w' in card_color_identity:
            is_w.append(1)
        else: 
            is_w.append(0)

        if 'u' in card_color_identity:
            is_u.append(1)
        else: 
            is_u.append(0)

        if 'b' in card_color_identity:
            is_b.append(1)
        else: 
            is_b.append(0)

        if 'r' in card_color_identity:
            is_r.append(1)
        else: 
            is_r.append(0)

        if 'g' in card_color_identity:
            is_g.append(1)
        else: 
            is_g.append(0)
    else: 
        is_w.append(0)
        is_u.append(0)
        is_b.append(0)
        is_r.append(0)
        is_g.append(0)
        

data_tuples = list(zip(is_w, is_u, is_b, is_r, is_g))
color_var_df = pd.DataFrame(data_tuples, columns = ['is_w', 'is_u', 'is_b', 'is_r', 'is_g'], index=modern_df.index)

color_var_df.head()

Unnamed: 0_level_0,is_w,is_u,is_b,is_r,is_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bazaar of Baghdad,0,0,0,0,0
Volcanic Island,0,0,0,0,0
Tundra,0,0,0,0,0
Underground Sea,0,0,0,0,0
Mox Diamond,0,0,0,0,0


In [86]:
modern_df = pd.concat([modern_df, color_var_df], axis=1, sort=False)

modern_df.head()

Unnamed: 0_level_0,expansion,rarity,cost,cmc,color_identity,rules_text,price,is_w,is_u,is_b,is_r,is_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Bazaar of Baghdad,Arabian Nights,U,,0,colorless,": Draw two cards, then discard three cards.",2999.0,0,0,0,0,0
Volcanic Island,3rd Edition,R,,0,colorless,: Add or .,899.99,0,0,0,0,0
Tundra,3rd Edition,R,,0,colorless,: Add or .,529.99,0,0,0,0,0
Underground Sea,3rd Edition,R,,0,colorless,: Add or .,899.99,0,0,0,0,0
Mox Diamond,Stronghold,R,,0,colorless,"If Mox Diamond would enter the battlefield, yo...",749.99,0,0,0,0,0


### Koduję rzadkość karty w formie liczby 1-4

In [87]:
modern_df['rarity_num'] = modern_df.rarity.apply(lambda x: get_rarity_num(x))
modern_df.head()

Unnamed: 0_level_0,expansion,rarity,cost,cmc,color_identity,rules_text,price,is_w,is_u,is_b,is_r,is_g,rarity_num
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Bazaar of Baghdad,Arabian Nights,U,,0,colorless,": Draw two cards, then discard three cards.",2999.0,0,0,0,0,0,2
Volcanic Island,3rd Edition,R,,0,colorless,: Add or .,899.99,0,0,0,0,0,3
Tundra,3rd Edition,R,,0,colorless,: Add or .,529.99,0,0,0,0,0,3
Underground Sea,3rd Edition,R,,0,colorless,: Add or .,899.99,0,0,0,0,0,3
Mox Diamond,Stronghold,R,,0,colorless,"If Mox Diamond would enter the battlefield, yo...",749.99,0,0,0,0,0,3


### Dodaję ilość znaków w instrukcji karty 

In [88]:
rules_text_len_mod = []
for i in modern_df['rules_text']:
    rules_text_len_mod.append(len(i))
    
modern_df['rules_text_len'] = rules_text_len_mod
modern_df.head()

Unnamed: 0_level_0,expansion,rarity,cost,cmc,color_identity,rules_text,price,is_w,is_u,is_b,is_r,is_g,rarity_num,rules_text_len
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Bazaar of Baghdad,Arabian Nights,U,,0,colorless,": Draw two cards, then discard three cards.",2999.0,0,0,0,0,0,2,43
Volcanic Island,3rd Edition,R,,0,colorless,: Add or .,899.99,0,0,0,0,0,3,11
Tundra,3rd Edition,R,,0,colorless,: Add or .,529.99,0,0,0,0,0,3,11
Underground Sea,3rd Edition,R,,0,colorless,: Add or .,899.99,0,0,0,0,0,3,11
Mox Diamond,Stronghold,R,,0,colorless,"If Mox Diamond would enter the battlefield, yo...",749.99,0,0,0,0,0,3,207


In [278]:
#backup
filename = 'modern_data_rarity_color_rules_len_coded_PICKLE'
outfile = open(filename,'wb')
pickle.dump(modern_df,outfile)
outfile.close()

In [335]:
modern_df = pd.read_pickle('modern_data_rarity_color_rules_len_coded_PICKLE')

### Dodaję ilość wydań karty

In [336]:
num_printings_dict = modern_df.index.value_counts().to_dict()
modern_df['num_printings']= modern_df.index.map(num_printings_dict)

modern_df.head()

Unnamed: 0_level_0,expansion,rarity,cost,cmc,color_identity,rules_text,price,is_w,is_u,is_b,is_r,is_g,rarity_num,rules_text_len,num_printings
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Bazaar of Baghdad,Arabian Nights,U,,0,colorless,": Draw two cards, then discard three cards.",2999.0,0,0,0,0,0,2,43,1
Volcanic Island,3rd Edition,R,,0,colorless,: Add or .,899.99,0,0,0,0,0,3,11,1
Tundra,3rd Edition,R,,0,colorless,: Add or .,529.99,0,0,0,0,0,3,11,2
Underground Sea,3rd Edition,R,,0,colorless,: Add or .,899.99,0,0,0,0,0,3,11,2
Mox Diamond,Stronghold,R,,0,colorless,"If Mox Diamond would enter the battlefield, yo...",749.99,0,0,0,0,0,3,207,2


### Dodaję czas od wydania

In [4]:
mtg_set_wiki_url = 'https://en.wikipedia.org/wiki/List_of_Magic:_The_Gathering_sets'

response = requests.get(mtg_set_wiki_url)

response.status_code  

200

In [6]:
page = response.text
soup = BeautifulSoup(page)

### Scrapuję nazwy dodatków i daty ich wydania z Wikipedii

In [339]:
expansion_names, set_dates = get_expansion_names_dates(soup)

Arabian Nights

December 1993[31]



Antiquities

March 1994[32]



Legends

June 1994[33]



The Dark

August 1994[34]



Fallen Empires

November 1994[35]



Ice Age

June 1995[36]



Homelands[IX]

October 1995[37]



Alliances

June 10, 1996[10]



Mirage

October 7, 1996[10]



Visions

February 3, 1997[41]



Weatherlight

June 9, 1997[10]



Tempest

October 13, 1997[10]



Stronghold

March 2, 1998[10]



Exodus

June 15, 1998[10]



Urza's Saga

October 12, 1998[47]



Urza's Legacy

February 15, 1999[10]



Urza's Destiny

June 7, 1999[49]



Mercadian Masques

October 4, 1999[51]



Nemesis

February 14, 2000[53]



Prophecy

June 5, 2000[54]



Invasion

October 2, 2000[55]



Planeshift

February 5, 2001[56]



Apocalypse

June 4, 2001



Odyssey

October 1, 2001



Torment

February 4, 2002[58]



Judgment

May 27, 2002[59]



Onslaught

October 7, 2002[60]



Legions

February 3, 2003



Scourge

May 26, 2003[62]



Mirrodin

October 3, 2003[63]



Darksteel

February 6,

In [340]:
print(len(expansion_names), len(set_dates))

85 85


In [341]:
# usuwam [] z dat

set_dates_new = remove_bracket_rel_dates(set_dates)
len(set_dates_new)

85

### Tworzę df z danymi z Wikipedii

In [342]:
sets_and_rel_dates = pd.DataFrame(list(zip(expansion_names, set_dates_new)), columns = ['expansion', 'rel_date'])

sets_and_rel_dates.head()

Unnamed: 0,expansion,rel_date
0,Arabian Nights,December 1993
1,Antiquities,March 1994
2,Legends,June 1994
3,The Dark,August 1994
4,Fallen Empires,November 1994


###  Analogicznie dla core setów

In [343]:
core_expansion_names, core_set_dates = scrape_core_sets(soup)

Limited Edition Alpha
August 5, 1993[6]

Limited Edition Beta
October 1993[7]

Unlimited Edition
December 1993[6]

Revised Edition
April 1994[8]

Fourth Edition
April 1995[9]

Fifth Edition
March 24, 1997[10]

Classic Sixth Edition
April 28, 1999[10]

Seventh Edition
April 11, 2001[10]

Eighth Edition
July 28, 2003[11]

Ninth Edition
July 29, 2005[12]

Tenth Edition
July 13, 2007[13]

Magic 2010
July 17, 2009[15]

Magic 2011
July 16, 2010[16]

Magic 2012
July 15, 2011[17]

Magic 2013
July 13, 2012[18]

Magic 2014
July 19, 2013[19]

Magic 2015
July 18, 2014[20]

Magic Origins
July 17, 2015[22]

Core Set 2019
July 13, 2018[23]

Core Set 2020
July 12, 2019[24]

Core Set 2021
July 3, 2020[25]



In [344]:
core_set_dates_new = remove_brackets_core_set_list(core_set_dates)
core_sets_and_rel_dates = pd.DataFrame(list(zip(core_expansion_names, core_set_dates_new)), columns = ['expansion', 'rel_date'])
core_sets_and_rel_dates.head()

Unnamed: 0,expansion,rel_date
0,Limited Edition Alpha,"August 5, 1993"
1,Limited Edition Beta,October 1993
2,Unlimited Edition,December 1993
3,Revised Edition,April 1994
4,Fourth Edition,April 1995


### Analogicznie dla zestawów kompilacyjnych i z reprintami

In [345]:
supp_expansion_names, supp_set_dates = scrape_reprint_comp_sets(soup)

Chronicles[XVII]

July 1995[163]



Rivals Quick Start Set

July 1996[164]



Multiverse Gift Box

November 1996[165]



Anthologies

November 1998[166]



Battle Royale Box Set

November 12, 1999[167]



Beatdown Box Set

December 2000[168]



Deckmasters: Garfield vs. Finkel

September 17, 2001[169]



Premium Foil Booster

January 8, 2010[170]



Duels of the Planeswalkers (decks)

June 4, 2010[171]



Modern Event Deck

May 30, 2014[172]



Mystery  Booster

March 13, 2020[173]



Time Spiral Remastered

March 19, 2021



Dominaria Remastered

January 13, 2023



Renaissance (French/German)

August 1995[163]



Rinascimento (Italian)

August 1995[163]



Duel Decks: Elves vs. Goblins

November 16, 2007[174]



Duel Decks: Jace vs. Chandra

November 7, 2008[175]



Duel Decks: Divine vs. Demonic

April 10, 2009[176]



Duel Decks: Garruk vs. Liliana

October 30, 2009[177]



Duel Decks: Phyrexia vs. the Coalition

March 19, 2010[178]



Duel Decks: Elspeth vs. Tezzeret

September 3,

In [346]:
supp_set_dates_new = remove_brackets_supp_sets(supp_set_dates)
supp_sets_and_rel_dates = pd.DataFrame(list(zip(supp_expansion_names, supp_set_dates_new)), columns = ['expansion', 'rel_date'])
supp_sets_and_rel_dates.head()

Unnamed: 0,expansion,rel_date
0,Chronicles[XVII],July 1995[163]
1,Rivals Quick Start Set,July 1996[164]
2,Multiverse Gift Box,November 1996[165]
3,Anthologies,November 1998[166]
4,Battle Royale Box Set,"November 12, 1999[167]"


In [347]:
sets_and_rel_dates = sets_and_rel_dates.append(core_sets_and_rel_dates, ignore_index = True)
sets_and_rel_dates = sets_and_rel_dates.append(supp_sets_and_rel_dates, ignore_index = True)

sets_and_rel_dates.tail()

Unnamed: 0,expansion,rel_date
183,2014 Holiday Gift Box,"November 14, 2014[225]"
184,2015 Holiday Gift Box,"November 6, 2015[226]"
185,Shadows over Innistrad: The Gift Box,"May 13, 2016[227]"
186,Kaladesh: The Gift Box,
187,Throne of Eldraine Bundle Gift Edition [228],"November 15, 2019"


In [348]:
sets_and_rel_dates = sets_and_rel_dates.append({'expansion' : 'Modern Horizons' , 'rel_date' : 'June 14, 2019'} , ignore_index=True)

In [349]:
sets_and_rel_dates['expansion'][65:90]

65                Fate Reforged
66            Dragons of Tarkir
67          Battle for Zendikar
68        Oath of the Gatewatch
69       Shadows over Innistrad
70                Eldritch Moon
71                     Kaladesh
72                Aether Revolt
73                     Amonkhet
74          Hour of Devastation
75                       Ixalan
76             Rivals of Ixalan
77                    Dominaria
78            Guilds of Ravnica
79           Ravnica Allegiance
80        War of the Spark[132]
81           Throne of Eldraine
82          Theros Beyond Death
83    Ikoria: Lair of Behemoths
84              Zendikar Rising
85        Limited Edition Alpha
86         Limited Edition Beta
87            Unlimited Edition
88              Revised Edition
89               Fourth Edition
Name: expansion, dtype: object

### Manualne zmiany w formatowaniu

In [350]:
good = ['2010 Core Set', '2011 Core Set','2012 Core Set', '2013 Core Set', '2014 Core Set', '2015 Core Set']
bad = ['Magic 2010', 'Magic 2011', 'Magic 2012', 'Magic 2013', 'Magic 2014', 'Magic 2015',]

for i, j in zip(bad, good):
    sets_and_rel_dates= sets_and_rel_dates.replace(i,j)
    
bad2= ['Shadows over Innistrad','Modern Masters 2015 Edition','Fourth Edition','Fifth Edition','Classic Sixth Edition','Seventh Edition', 'Eighth Edition','Ninth Edition', 'Tenth Edition', 'Ravnica: City of Guilds', 'Coldsnap[IX]']

good2=['Shadows Over Innistrad','Modern Masters 2015','4th Edition','5th Edition','6th Edition','7th Edition','8th Edition','9th Edition','10th Edition', 'Ravnica', 'Coldsnap']

for i, j in zip(bad2, good2):
    sets_and_rel_dates= sets_and_rel_dates.replace(i,j)
    
good3 = ['War of the Spark', '3rd Edition','Theros: Beyond Death', 'Alpha', 'Beta', 'Unlimited']
bad3 = ['War of the Spark[132]','Revised Edition','Theros Beyond Death','Limited Edition Alpha','Limited Edition Beta','Unlimited Edition']

for i, j in zip(bad3, good3):
    sets_and_rel_dates= sets_and_rel_dates.replace(i,j)

Masa manualnego dodawania setów przez różnice między nazwami na card kingdom i wikipedii

In [351]:
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Modern Horizons 2','June 18, 2021']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Dominaria United','September 09, 2022']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Kamigawa: Neon Dynasty','February 18, 2022']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Phyrexia: All Will Be One','February 10, 2023']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Commander Legends','November 20, 2020']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Mystery Booster/The List','March 13, 2020']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Streets of New Capenna','April 29, 2022']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Commander Legends: Battle for Baldur\'s Gate','June 10, 2022']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Theros Beyond Death','January 17, 2020']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Adventures in the Forgotten Realms Commander Decks','July 23, 2021']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Adventures in the Forgotten Realms','July 23, 2021']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Battlebond','June 8, 2018']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Collectors Ed','December 01, 1993']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Collectors Ed Intl','December 01, 1993']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Commander','June 17, 2011']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Commander 2013','November 1, 2013']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Commander 2014','November 7, 2014']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Commander 2015','November 13, 2015']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Commander 2016','November 11, 2016']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Commander 2017','August 25, 2017']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Commander 2018','August 10, 2018']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Commander 2019','August 23, 2019']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Commander 2020','May 15, 2020']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Commander 2021','April 23, 2021']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Commander\'s Arsenal','November 2, 2012']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Commander Anthology Vol. II','June 8, 2018']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Commander Anthology','June 9, 2017']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Commander Legends Variants','November 20, 2020']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Conspiracy - Take the Crown','August 26, 2016']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Conspiracy','June 6, 2014']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Dominaria Remastered Variants','January 13, 2023']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Dominaria United Commander Decks','September 09, 2022']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Double Masters','August 7, 2020']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Double Masters 2022','July 8, 2022']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Double Masters 2022 Variants','July 8, 2022']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Double Masters Box Toppers','August 7, 2020']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Eternal Masters','June 10, 2016']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Iconic Masters','November 17, 2017']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Innistrad: Crimson Vow','November 19, 2021']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Innistrad: Crimson Vow Commander Decks ','November 19, 2021']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Innistrad: Double Feature','January 28, 2022']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Innistrad: Midnight Hunt','September 24, 2021']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Innistrad: Midnight Hunt Commander Decks','September 24, 2021']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Innistrad: Midnight Hunt Variants','September 24, 2021']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Jumpstart','July 17, 2020']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Jumpstart 2022','December 2, 2022']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Kaldheim','February 5, 2021']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Kaldheim Commander Decks','February 2, 2021']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Kamigawa: Neon Dynasty Commander Decks','February 18, 2022']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Masterpiece Series: Mythic Edition','October 03, 2018']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Masters 25','March 16, 2018']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Modern Horizons - Retro Frames','June 14, 2019']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Modern Horizons 2 Variants','June 18, 2021']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Modern Masters','June 7, 2013']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Modern Masters 2015','May 22, 2015']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Modern Masters 2017','March 17, 2017']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Portal','June 01, 1997']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Portal 3K','May 01, 1999']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Portal II','June 01, 1998']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Promo Pack','January 01, 2000']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Promotional','January 01, 2000']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Secret Lair','January 01, 2020']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Starter Commander Decks','July 01, 2000']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Streets of New Capenna Commander Decks','April 29, 2022']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Strixhaven Mystical Archive','April 23, 2021']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Strixhaven Mystical Archive JPN','April 23, 2021']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Strixhaven: School of Mages','April 23, 2021']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['The Brothers\' War','November 18, 2022']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['The Brothers\' War Commander Decks','November 18, 2022']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['The Brothers\' War Retro Artifacts','November 18, 2022']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Ultimate Masters','December 7, 2018']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Universes Beyond: Warhammer 40,000','October 7, 2022']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Universes Beyond: Transformers','November 18, 2022']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['World Championships','January 01, 1998']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Zendikar Rising Commander Decks','September 25, 2020']
sets_and_rel_dates.loc[len(sets_and_rel_dates.index)] = ['Zendikar Rising Expeditions','September 25, 2020']

In [352]:
sets_and_rel_dates.loc[sets_and_rel_dates['expansion']=='Rise of the Eldrazi', ['rel_date']]='April 23, 2010'
sets_and_rel_dates.loc[sets_and_rel_dates['expansion']=='From the Vault: Relics', ['rel_date']]='August 27, 2010'

In [353]:
sets_and_rel_dates['rel_date'].replace('N/A', np.nan)

0           December 1993
1              March 1994
2               June 1994
3             August 1994
4           November 1994
              ...        
260       October 7, 2022
261     November 18, 2022
262      January 01, 1998
263    September 25, 2020
264    September 25, 2020
Name: rel_date, Length: 265, dtype: object

In [354]:
sets_and_rel_dates['rel_date'] = sets_and_rel_dates['rel_date'].apply(lambda x:x.strip())

sets_and_rel_dates['rel_date'][22]

'June 4, 2001'

In [355]:
sets_and_rel_dates['rel_date'][54] = sets_and_rel_dates['rel_date'][54].replace('[98]', '')

### Formatowanie dat

In [356]:
dates_format_correct = []
for date_ in sets_and_rel_dates['rel_date']:
    try:
        dates_format_correct.append(pd.to_datetime(date_))
    except: dates_format_correct.append(np.nan)

dates_format_correct

[Timestamp('1993-12-01 00:00:00'),
 Timestamp('1994-03-01 00:00:00'),
 Timestamp('1994-06-01 00:00:00'),
 Timestamp('1994-08-01 00:00:00'),
 Timestamp('1994-11-01 00:00:00'),
 Timestamp('1995-06-01 00:00:00'),
 Timestamp('1995-10-01 00:00:00'),
 Timestamp('1996-06-10 00:00:00'),
 Timestamp('1996-10-07 00:00:00'),
 Timestamp('1997-02-03 00:00:00'),
 Timestamp('1997-06-09 00:00:00'),
 Timestamp('1997-10-13 00:00:00'),
 Timestamp('1998-03-02 00:00:00'),
 Timestamp('1998-06-15 00:00:00'),
 Timestamp('1998-10-12 00:00:00'),
 Timestamp('1999-02-15 00:00:00'),
 Timestamp('1999-06-07 00:00:00'),
 Timestamp('1999-10-04 00:00:00'),
 Timestamp('2000-02-14 00:00:00'),
 Timestamp('2000-06-05 00:00:00'),
 Timestamp('2000-10-02 00:00:00'),
 Timestamp('2001-02-05 00:00:00'),
 Timestamp('2001-06-04 00:00:00'),
 Timestamp('2001-10-01 00:00:00'),
 Timestamp('2002-02-04 00:00:00'),
 Timestamp('2002-05-27 00:00:00'),
 Timestamp('2002-10-07 00:00:00'),
 Timestamp('2003-02-03 00:00:00'),
 Timestamp('2003-05-

In [357]:
sets_and_rel_dates['rel_date_correct'] = dates_format_correct

In [358]:
expansion_date_dict = pd.Series(sets_and_rel_dates['rel_date_correct'].values,index=sets_and_rel_dates['expansion']).to_dict()
modern_df['rel_date'] = modern_df['expansion'].map(expansion_date_dict)
modern_df.head()

Unnamed: 0_level_0,expansion,rarity,cost,cmc,color_identity,rules_text,price,is_w,is_u,is_b,is_r,is_g,rarity_num,rules_text_len,num_printings,rel_date
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Bazaar of Baghdad,Arabian Nights,U,,0,colorless,": Draw two cards, then discard three cards.",2999.0,0,0,0,0,0,2,43,1,1993-12-01
Volcanic Island,3rd Edition,R,,0,colorless,: Add or .,899.99,0,0,0,0,0,3,11,1,1994-04-01
Tundra,3rd Edition,R,,0,colorless,: Add or .,529.99,0,0,0,0,0,3,11,2,1994-04-01
Underground Sea,3rd Edition,R,,0,colorless,: Add or .,899.99,0,0,0,0,0,3,11,2,1994-04-01
Mox Diamond,Stronghold,R,,0,colorless,"If Mox Diamond would enter the battlefield, yo...",749.99,0,0,0,0,0,3,207,2,1998-03-02


In [359]:
mask = modern_df["rel_date"].isna()
df = modern_df[mask]
df = df.drop_duplicates('expansion')
df

Unnamed: 0_level_0,expansion,rarity,cost,cmc,color_identity,rules_text,price,is_w,is_u,is_b,is_r,is_g,rarity_num,rules_text_len,num_printings,rel_date
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
"Sheoldred, the Apocalypse (Phyrexian)",Dominaria United Variants,M,2bb,4,b,"Deathtouch Whenever you draw a card, you gain ...",74.99,0,0,1,0,0,4,106,1,NaT
Ashnod's Altar,Chronicles,C,3,3,colorless,Sacrifice a creature: Add .,8.99,0,0,0,0,0,1,27,7,NaT
Reliquary Tower (Bring-a-Friend Foil),Promotional\n,M,,0,colorless,You have no maximum hand size. : Add .,5.99,0,0,0,0,0,4,38,1,NaT
Saw in Half,Unfinity,R,2b,3,b,Destroy target creature. If that creature dies...,7.49,0,0,1,0,0,3,258,1,NaT
Sylvan Library,Commander Collection: Green,R,1g,2,g,"At the beginning of your draw step, you may dr...",42.99,0,0,0,0,1,3,204,5,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Village Rites,Strixhaven Mystical Archive\n,U,b,1,b,"As an additional cost to cast this spell, sacr...",0.69,0,0,1,0,0,2,79,3,NaT
Stonybrook Banneret,Morningtide\n,C,1u,2,u,Islandwalk Merfolk spells and Wizard spells yo...,1.29,0,1,0,0,0,1,72,1,NaT
Caustic Caterpillar,Magic Origins\n,C,g,1,g,", Sacrifice Caustic Caterpillar: Destroy targe...",0.69,0,0,0,0,1,1,72,1,NaT
Retreat to Hagra,Battle for Zendikar\n,U,2b,3,b,Landfall - Whenever a land enters the battlefi...,0.59,0,0,1,0,0,2,199,1,NaT


### Dodawanie kolumny time since release

In [360]:
modern_df['time_since_release'] = pd.to_datetime('today') - modern_df['rel_date']
type(modern_df.time_since_release)

modern_df.time_since_release[0]

Timedelta('10644 days 23:03:32.497444')

In [361]:
modern_df['time_since_release'] = modern_df['time_since_release'].apply(lambda x: get_days(x))
modern_df['time_since_release']

name
Bazaar of Baghdad                10644.0
Volcanic Island                  10523.0
Tundra                           10523.0
Underground Sea                  10523.0
Mox Diamond                       9092.0
                                  ...   
Goblin Bombardment                   NaN
Domri, Anarch of Bolas (Foil)     1117.0
Open the Vaults                   2627.0
Demon of Dark Schemes             2305.0
Clone Legion                      2858.0
Name: time_since_release, Length: 8381, dtype: float64

In [362]:
modern_df['time_since_release'].value_counts() 

8422.0    525
1117.0    484
1045.0    339
65.0      285
639.0     186
         ... 
5098.0      8
3733.0      8
3551.0      8
8266.0      7
4531.0      5
Name: time_since_release, Length: 157, dtype: int64

In [363]:
mask = modern_df["time_since_release"].isna()
modern_df[mask]

Unnamed: 0_level_0,expansion,rarity,cost,cmc,color_identity,rules_text,price,is_w,is_u,is_b,is_r,is_g,rarity_num,rules_text_len,num_printings,rel_date,time_since_release
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
"Sheoldred, the Apocalypse (Phyrexian)",Dominaria United Variants,M,2bb,4,b,"Deathtouch Whenever you draw a card, you gain ...",74.99,0,0,1,0,0,4,106,1,NaT,
Ashnod's Altar,Chronicles,C,3,3,colorless,Sacrifice a creature: Add .,8.99,0,0,0,0,0,1,27,7,NaT,
Reliquary Tower (Bring-a-Friend Foil),Promotional\n,M,,0,colorless,You have no maximum hand size. : Add .,5.99,0,0,0,0,0,4,38,1,NaT,
Saw in Half,Unfinity,R,2b,3,b,Destroy target creature. If that creature dies...,7.49,0,0,1,0,0,3,258,1,NaT,
Sylvan Library,Commander Collection: Green,R,1g,2,g,"At the beginning of your draw step, you may dr...",42.99,0,0,0,0,1,3,204,5,NaT,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Beacon of Tomorrows,Duel Decks: Mind Vs. Might,R,6uu,8,u,Target player takes an extra turn after this o...,3.49,0,1,0,0,0,3,103,2,NaT,
Rakdos the Defiler,Ravnica Allegiance: Guild Kits,R,2bbrr,6,br,"Flying, trample Whenever Rakdos the Defiler at...",3.49,0,0,1,1,0,3,245,2,NaT,
Searing Blaze,Duel Decks: Venser Vs. Koth,C,rr,2,r,Searing Blaze deals 1 damage to target player ...,3.49,0,0,0,1,0,1,263,2,NaT,
Goblin Offensive,Anthologies,U,x1rr,4,r,Put X 1/1 red Goblin creature tokens onto the ...,2.79,0,0,0,1,0,2,58,2,NaT,


### Usuwam NaNy

In [364]:
filename = 'modern_data_nans_PICKLE'
outfile = open(filename,'wb')
pickle.dump(modern_df,outfile)
outfile.close()

In [365]:
modern_df = pd.read_pickle('modern_data_nans_PICKLE')
modern_df.dropna(axis=0, subset=['time_since_release'], inplace=True)
modern_df.head()

Unnamed: 0_level_0,expansion,rarity,cost,cmc,color_identity,rules_text,price,is_w,is_u,is_b,is_r,is_g,rarity_num,rules_text_len,num_printings,rel_date,time_since_release
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Bazaar of Baghdad,Arabian Nights,U,,0,colorless,": Draw two cards, then discard three cards.",2999.0,0,0,0,0,0,2,43,1,1993-12-01,10644.0
Volcanic Island,3rd Edition,R,,0,colorless,: Add or .,899.99,0,0,0,0,0,3,11,1,1994-04-01,10523.0
Tundra,3rd Edition,R,,0,colorless,: Add or .,529.99,0,0,0,0,0,3,11,2,1994-04-01,10523.0
Underground Sea,3rd Edition,R,,0,colorless,: Add or .,899.99,0,0,0,0,0,3,11,2,1994-04-01,10523.0
Mox Diamond,Stronghold,R,,0,colorless,"If Mox Diamond would enter the battlefield, yo...",749.99,0,0,0,0,0,3,207,2,1998-03-02,9092.0


In [366]:
modern_df.shape

(7947, 17)

In [367]:
filename = 'modern_df_ready_for_model_PICKLE'
outfile = open(filename,'wb')
pickle.dump(modern_df,outfile)
outfile.close()
