# Web scrapping

#### Imports

In [3]:
#dataframe
import pandas as pd
import numpy as np

#selenium
import selenium
from selenium.webdriver import Firefox
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
import time

import re

import warnings
warnings.filterwarnings('ignore')

### 1. attribute

In [59]:
#get page with attributes
opts = Options()
opts.headless = True
browser = Firefox(options=opts)
path1 = 'https://www.larvalabs.com/cryptopunks/attributes'
browser.get(path1)

In [60]:
table = browser.find_element(By.XPATH,'/html/body/div[2]/div/div[3]/div/div[2]/table')

In [61]:
columns=[]

for element in table.find_elements(By.TAG_NAME, "th"):    
    columns.append(element.text)
    
print(columns)

['Attribute', '#', 'Avail', 'Avg Sale', 'Cheapest', 'More Examples']


In [62]:
table = browser.find_element(By.XPATH,'/html/body/div[2]/div/div[3]/div/div[2]/table/tbody')

In [63]:
results=[]

for element in table.find_elements(By.TAG_NAME, "td"):    
    results.append(element.text)

In [64]:
data = {}
for i in range(6):
    data[columns[i]]=results[i::6]

df = pd.DataFrame(data)
df.drop(columns=['More Examples'], inplace=True)
df.insert(0, 'id', range(0, len(df)))
df.head()

Unnamed: 0,id,Attribute,#,Avail,Avg Sale,Cheapest
0,0,Beanie,44,11,380.50Ξ,360Ξ
1,1,Choker,48,13,152Ξ,119.95Ξ
2,2,Pilot Helmet,54,12,0,190Ξ
3,3,Tiara,55,6,116.53Ξ,160Ξ
4,4,Orange Side,68,17,125Ξ,109.99Ξ


In [65]:
df.to_csv('attribute.csv')

### 2. punk

In [6]:
header_list = ['ranking','punk_image','punk_id','minscore','2nd','category_score','att_count_score','attributes','punk_skin','punk_type','total_score']
df = pd.read_csv('ranks.csv', names=header_list)
df = df[['ranking','punk_id','attributes','punk_skin','punk_type']]
df.head(3)

Unnamed: 0,ranking,punk_id,attributes,punk_skin,punk_type
0,1,"<a href=""https://www.larvalabs.com/cryptopunks...","Buck Teeth 0.7800,Top Hat 1.1500,Big Beard 1.4...",Mid,Male
1,2,"<a href=""https://www.larvalabs.com/cryptopunks...","Cowboy Hat 1.4200,Earring 24.5900",Alien,Alien
2,3,"<a href=""https://www.larvalabs.com/cryptopunks...","Medical Mask 1.7500,Knitted Cap 4.1900,Earring...",Alien,Alien


In [8]:
#change of format
pattern='\d{1,}'
search = []    
for values in df['punk_id']:
    search.append(re.search(pattern, values).group())

df['id'] = search

In [9]:
#category
conditions = [(df['punk_type'] == 'Alien'),
              (df['punk_type'] == 'Ape'),
              (df['punk_type'] == 'Zombie'),
              (df['punk_type'] == 'Female'),
              (df['punk_type'] == 'Male')]

values = [0, 1, 2, 3, 4]
df['type'] = np.select(conditions, values)

In [10]:
#skin
conditions = [(df['punk_skin'] == 'Alien'),
              (df['punk_skin'] == 'Ape'),
              (df['punk_skin'] == 'Zombie'),
              (df['punk_skin'] == 'Albino'),
              (df['punk_skin'] == 'Dark'),
              (df['punk_skin'] == 'Light'),
              (df['punk_skin'] == 'Mid')]

values = [0,1, 2, 3, 4, 5, 6]
df['skin'] = np.select(conditions, values)

In [11]:
df[['a','b']] = df['punk_id'].str.split("\"", 1, expand=True)
df[['url','d']] = df['b'].str.split("\"", 1, expand=True)
df.drop(columns=['a','b','d','punk_id'], axis=1, inplace=True)

In [12]:
len(df['id'].unique())

10000

In [13]:
df.head(2)

Unnamed: 0,ranking,attributes,punk_skin,punk_type,id,type,skin,url
0,1,"Buck Teeth 0.7800,Top Hat 1.1500,Big Beard 1.4...",Mid,Male,8348,4,6,https://www.larvalabs.com/cryptopunks/details/...
1,2,"Cowboy Hat 1.4200,Earring 24.5900",Alien,Alien,3443,0,0,https://www.larvalabs.com/cryptopunks/details/...


In [14]:
df.to_csv('punk.csv')

### 3. punk_attribute_relation

In [22]:
#get page with ranking and punks
opts = Options()
opts.headless = True
browser = Firefox(options=opts)

In [23]:
columns=['attribute','punk_id']
print(columns)

['attribute', 'punk_id']


In [57]:
results=[]

In [58]:
for i in range(1,1001):
    num = str(i)
    path = 'https://www.larvalabs.com/cryptopunks/details/'+num
    browser.get(path)
    table = browser.find_element(By.XPATH,'//*[@id="punkDetails"]/div[2]/div/div')
    
    for element in table.find_elements(By.TAG_NAME, "a"):
        results.append(element.text)
        results.append(num)
    time.sleep(0.1)
print('done')

done


In [41]:
data = {}
for i in range(2):
    data[columns[i]]=results[i::2]

df = pd.DataFrame(data)
df.head()

Unnamed: 0,attribute,punk_id
0,Smile,1
1,Mohawk,1
2,Wild Hair,2
3,Wild Hair,3
4,Nerd Glasses,3


In [43]:
df.to_csv('punk_attributes_relation.csv')

In [17]:
df = pd.read_csv('punk_attributes_relation.csv')

In [18]:
#number of attributes
values = list(range(0, 87))
#conditions
conditions = [(df['attribute'] == 'Beanie'),(df['attribute'] == 'Choker'),
             (df['attribute'] == 'Pilot Helmet'),(df['attribute'] == 'Tiara'),
             (df['attribute'] == 'Orange Side'),(df['attribute'] == 'Buck Teeth'),
             (df['attribute'] == 'Welding Goggles'),(df['attribute'] == 'Pigtails'),
             (df['attribute'] == 'Pink With Hat'),(df['attribute'] == 'Top Hat'),
             (df['attribute'] == 'Spots'),(df['attribute'] == 'Rosy Cheeks'),
             (df['attribute'] == 'Blonde Short'),(df['attribute'] == 'Wild White Hair'),
             (df['attribute'] == 'Cowboy Hat'),(df['attribute'] == 'Wild Blonde'),
             (df['attribute'] == 'Straight Hair Blonde'),(df['attribute'] == 'Big Beard'),
             (df['attribute'] == 'Red Mohawk'),(df['attribute'] == 'Half Shaved'),
             (df['attribute'] == 'Blonde Bob'),(df['attribute'] == 'Vampire Hair'),
             (df['attribute'] == 'Clown Hair Green'),(df['attribute'] == 'Straight Hair Dark'),
             (df['attribute'] == 'Straight Hair'),(df['attribute'] == 'Silver Chain'),
             (df['attribute'] == 'Dark Hair'),(df['attribute'] == 'Purple Hair'),
             (df['attribute'] == 'Gold Chain'),(df['attribute'] == 'Medical Mask'),
             (df['attribute'] == 'Tassle Hat'),(df['attribute'] == 'Fedora'),
             (df['attribute'] == 'Police Cap'),(df['attribute'] == 'Clown Nose'),
             (df['attribute'] == 'Smile'),(df['attribute'] == 'Cap Forward'),
             (df['attribute'] == 'Hoodie'),(df['attribute'] == 'Front Beard Dark'),
             (df['attribute'] == 'Frown'),(df['attribute'] == 'Purple Eye Shadow'),   
             (df['attribute'] == 'Handlebars'),(df['attribute'] == 'Blue Eye Shadow'),
             (df['attribute'] == 'Green Eye Shadow'),(df['attribute'] == 'Vape'),
             (df['attribute'] == 'Front Beard'),(df['attribute'] == 'Chinstrap'),
             (df['attribute'] == '3D Glasses'),(df['attribute'] == 'Luxurious Beard'),
             (df['attribute'] == 'Mustache'),(df['attribute'] == 'Normal Beard Black'),
             (df['attribute'] == 'Normal Beard'),(df['attribute'] == 'Eye Mask'),
             (df['attribute'] == 'Goat'),(df['attribute'] == 'Do-rag'),
             (df['attribute'] == 'Shaved Head'),(df['attribute'] == 'Muttonchops'),
             (df['attribute'] == 'Peak Spike'),(df['attribute'] == 'Pipe'),
             (df['attribute'] == 'VR'),(df['attribute'] == 'Cap'),
             (df['attribute'] == 'Small Shades'),(df['attribute'] == 'Clown Eyes Green'),
             (df['attribute'] == 'Clown Eyes Blue'),(df['attribute'] == 'Headband'),
             (df['attribute'] == 'Crazy Hair'),(df['attribute'] == 'Knitted Cap'),
             (df['attribute'] == 'Mohawk Dark'),(df['attribute'] == 'Mohawk'),
             (df['attribute'] == 'Mohawk Thin'),(df['attribute'] == 'Frumpy Hair'),
             (df['attribute'] == 'Wild Hair'),(df['attribute'] == 'Messy Hair'),
             (df['attribute'] == 'Eye Patch'),(df['attribute'] == 'Stringy Hair'),
             (df['attribute'] == 'Bandana'),(df['attribute'] == 'Classic Shadesv'),
             (df['attribute'] == 'Shadow Beard'),(df['attribute'] == 'Regular Shades'),
             (df['attribute'] == 'Horned Rim Glasses'),(df['attribute'] == 'Big Shades'),
             (df['attribute'] == 'Nerd Glasses'),(df['attribute'] == 'Black Lipstick'),
             (df['attribute'] == 'Mole'),(df['attribute'] == 'Purple Lipstick'),
             (df['attribute'] == 'Hot Lipstick'),(df['attribute'] == 'Cigarette'),
             (df['attribute'] == 'Earring')]

df['att_id'] = np.select(conditions, values)

In [19]:
df.drop(columns=['Unnamed: 0'], axis=1, inplace=True)

In [20]:
len(df['punk_id'])

28132

In [21]:
df.drop_duplicates(subset=None, keep='first', inplace=True)

In [22]:
len(df['punk_id'])

27541

In [23]:
len(df['punk_id'].unique())

9992

In [24]:
df.loc[df['punk_id'] == 0]

Unnamed: 0,attribute,punk_id,att_id
6203,Green Eye Shadow,0,42
6204,Earring,0,86
6205,Blonde Bob,0,20
27954,Peak Spike,0,56
27956,Normal Beard,0,50


In [25]:
df.drop([27954, 27956], inplace=True)
df.loc[df['punk_id'] == 0]

Unnamed: 0,attribute,punk_id,att_id
6203,Green Eye Shadow,0,42
6204,Earring,0,86
6205,Blonde Bob,0,20


In [26]:
df.to_csv('punk_attribute_relation.csv')

### 4. transaction

In [2]:
#initiate browser
opts = Options()
opts.headless = True
browser = Firefox(options=opts)

In [5]:
results=[]

In [3]:
columns=['1','2','3','4','5','6','7','8','9','10']
print(columns)

['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']


In [None]:
#loop to fetch all transactions
for i in range(0,10000):
    num = str(i)
    path = 'https://www.larvalabs.com/cryptopunks/details/'+ num
    browser.get(path)
    table = browser.find_element(By.XPATH,'//*[@id="punkHistory"]/div/table/tbody')
    
    try:
        for element in table.find_elements(By.TAG_NAME, "td"):
            results.append(element.text)
            results.append(num)
    except:
        pass
    time.sleep(0.1) 

In [112]:
data = {}
for i in range(10):
    data[columns[i]]=results[i::10]

df = pd.DataFrame(data)
df = df.iloc[1: , :]
df.reset_index(inplace=True, drop=True)
df.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
0,Sold,2000,0xc352b5,2000,peted.et…,2000,"120Ξ ($276,121)",2000,"Apr 13, 2021",2000
1,Offered,2000,,2000,,2000,"120Ξ ($259,800)",2000,"Apr 13, 2021",2000
2,Bid Withdrawn,2000,daddykal…,2000,,2000,"72.69Ξ ($156,217)",2000,"Apr 12, 2021",2000
3,Bid,2000,daddykal…,2000,,2000,"72.69Ξ ($156,126)",2000,"Apr 12, 2021",2000
4,Bid Withdrawn,2000,caff.eth,2000,,2000,"50Ξ ($86,742)",2000,"Feb 16, 2021",2000


In [29]:
df.to_csv('transaction_all_data.csv')

In [96]:
df = pd.read_csv('transaction_all_data.csv')
df.head(3)

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,1,2,3,4,5,6,7,8,9,10
0,0,0,Bid,0,0xe73a1d,0,,0,321Ξ ($1.14M),0,"Sep 01, 2021",0
1,1,1,Bid Withdrawn,0,ethmania…,0,,0,320Ξ ($1.11M),0,"Sep 01, 2021",0
2,2,2,Bid,0,ethmania…,0,,0,320Ξ ($1.1M),0,"Sep 01, 2021",0


In [97]:
df.drop(columns=['Unnamed: 0','Unnamed: 0.1','2','3','4','5','6','8'], axis=1, inplace=True)
df.drop(df[df['1']=='Transfer'].index)
df = df.rename(columns={'10':'punk_id','1':'type'})
df['id'] = df.index
df.head(3)

Unnamed: 0,type,7,9,punk_id,id
0,Bid,321Ξ ($1.14M),"Sep 01, 2021",0,0
1,Bid Withdrawn,320Ξ ($1.11M),"Sep 01, 2021",0,1
2,Bid,320Ξ ($1.1M),"Sep 01, 2021",0,2


In [98]:
df.isna().sum()

type           0
7          41114
9              0
punk_id        0
id             0
dtype: int64

In [99]:
df['7'] = df['7'].fillna('0.00Ξ')
df.isna().sum()

type       0
7          0
9          0
punk_id    0
id         0
dtype: int64

In [100]:
df.reset_index(drop=True)
df[['amount','b']] = df['7'].str.split("Ξ", 1, expand=True)
df.drop(columns=['b'], axis=1, inplace=True)

In [101]:
df.isna().sum()

type       0
7          0
9          0
punk_id    0
id         0
amount     0
dtype: int64

In [102]:
#change of format
pattern='\d.{0,}'
search = []    
for values in df['amount']:
    try:
        search.append(re.search(pattern, values).group())
    except:
        search.append(values)
df['amount'] = search
df[34950:34955]

Unnamed: 0,type,7,9,punk_id,id,amount
34950,Offer Withdrawn,0.00Ξ,"Apr 16, 2021",2484,34950,0.0
34951,Bid Withdrawn,"345Ξ ($847,627)","Apr 15, 2021",2484,34951,345.0
34952,Offered,650Ξ ($1.59M),"Apr 15, 2021",2484,34952,650.0
34953,Bid,"345Ξ ($836,370)","Apr 15, 2021",2484,34953,345.0
34954,Offered,850Ξ ($2.06M),"Apr 15, 2021",2484,34954,850.0


In [103]:
len(df)

177730

In [104]:
boolean_findings1 = df['amount'].str.contains('K|M|B|T|Y')
total_occurence1 = boolean_findings1.sum()
total_occurence1

828

In [105]:
boolean_findings1

0         False
1         False
2         False
3         False
4         False
          ...  
177725    False
177726    False
177727    False
177728    False
177729    False
Name: amount, Length: 177730, dtype: bool

In [106]:
df['choice'] = boolean_findings1
df[34950:34955]

Unnamed: 0,type,7,9,punk_id,id,amount,choice
34950,Offer Withdrawn,0.00Ξ,"Apr 16, 2021",2484,34950,0.0,False
34951,Bid Withdrawn,"345Ξ ($847,627)","Apr 15, 2021",2484,34951,345.0,False
34952,Offered,650Ξ ($1.59M),"Apr 15, 2021",2484,34952,650.0,False
34953,Bid,"345Ξ ($836,370)","Apr 15, 2021",2484,34953,345.0,False
34954,Offered,850Ξ ($2.06M),"Apr 15, 2021",2484,34954,850.0,False


In [107]:
df = df[~df['amount'].str.contains("Y")]
df = df[~df['amount'].str.contains("Z")]
df = df[~df['amount'].str.contains("E")]

In [108]:
a = df.loc[df['choice'] == False]
b = df.loc[df['choice'] == True]

In [109]:
b.head(3)

Unnamed: 0,type,7,9,punk_id,id,amount,choice
1252,Offered,1.25KΞ ($3.72M),"Sep 30, 2021",155,1252,1.25K,True
2698,Offered,3.09KΞ ($10.82M),"Sep 17, 2021",309,2698,3.09K,True
2699,Offered,2.5KΞ ($8.8M),"Sep 01, 2021",309,2699,2.5K,True


In [110]:
b['amount']=b['amount'].str.replace(',','.')

In [111]:
b.head(3)

Unnamed: 0,type,7,9,punk_id,id,amount,choice
1252,Offered,1.25KΞ ($3.72M),"Sep 30, 2021",155,1252,1.25K,True
2698,Offered,3.09KΞ ($10.82M),"Sep 17, 2021",309,2698,3.09K,True
2699,Offered,2.5KΞ ($8.8M),"Sep 01, 2021",309,2699,2.5K,True


In [112]:
c = b[b['amount'].str.contains("K")]#thousand
d = b[b['amount'].str.contains("M")]#million
e = b[b['amount'].str.contains("B")]#billion
f = b[b['amount'].str.contains("T")]#trillion

In [113]:
c.tail(3)

Unnamed: 0,type,7,9,punk_id,id,amount,choice
175840,Offered,2KΞ ($3.48M),"Mar 12, 2021",8909,175840,2K,True
177357,Offered,6.97KΞ ($22.64M),"May 05, 2021",8988,177357,6.97K,True
177491,Offered,25KΞ ($106.95M),"Oct 29, 2021",8993,177491,25K,True


In [114]:
c['amount']=c['amount'].str.replace('K','')
d['amount']=d['amount'].str.replace('M','')
e['amount']=e['amount'].str.replace('B','')
f['amount']=f['amount'].str.replace('T','')

In [115]:
c.tail(3)

Unnamed: 0,type,7,9,punk_id,id,amount,choice
175840,Offered,2KΞ ($3.48M),"Mar 12, 2021",8909,175840,2.0,True
177357,Offered,6.97KΞ ($22.64M),"May 05, 2021",8988,177357,6.97,True
177491,Offered,25KΞ ($106.95M),"Oct 29, 2021",8993,177491,25.0,True


In [116]:
c['amount']=c['amount'].astype(float)
d['amount']=d['amount'].astype(float)
e['amount']=e['amount'].astype(float)
f['amount']=f['amount'].astype(float)

In [117]:
c['amount'] = c['amount'] * 1000
d['amount'] = d['amount'] * 1000000
e['amount'] = e['amount'] * 1000000000
f['amount'] = f['amount'] * 1000000000000
c.tail(3)

Unnamed: 0,type,7,9,punk_id,id,amount,choice
175840,Offered,2KΞ ($3.48M),"Mar 12, 2021",8909,175840,2000.0,True
177357,Offered,6.97KΞ ($22.64M),"May 05, 2021",8988,177357,6970.0,True
177491,Offered,25KΞ ($106.95M),"Oct 29, 2021",8993,177491,25000.0,True


In [118]:
df = pd.concat([a, c, d, e, f], ignore_index=True)
df = df.sort_values(by=['id'])
df.reset_index(drop=True)
df.tail()

Unnamed: 0,type,7,9,punk_id,id,amount,choice
176893,Bid,0.12Ξ ($77),"Apr 23, 2018",8999,177725,0.12,False
176894,Bid Withdrawn,0.12Ξ ($44),"Apr 06, 2018",8999,177726,0.12,False
176895,Bid,0.12Ξ ($45),"Apr 04, 2018",8999,177727,0.12,False
176896,Offered,"6.50Ξ ($1,406)","Jul 21, 2017",8999,177728,6.5,False
176897,Claimed,0.00Ξ,"Jun 23, 2017",8999,177729,0.0,False


In [119]:
df.isna().sum()

type       0
7          0
9          0
punk_id    0
id         0
amount     0
choice     0
dtype: int64

In [120]:
#change of format
pattern='\d{4}'
search = []    
for values in df['9']:
    try:
        search.append(re.search(pattern, values).group())
    except:
        search.append(re.search(pattern, values))
df['year'] = search
df.head(3)

Unnamed: 0,type,7,9,punk_id,id,amount,choice,year
0,Bid,321Ξ ($1.14M),"Sep 01, 2021",0,0,321,False,2021
1,Bid Withdrawn,320Ξ ($1.11M),"Sep 01, 2021",0,1,320,False,2021
2,Bid,320Ξ ($1.1M),"Sep 01, 2021",0,2,320,False,2021


In [121]:
#change of format
pattern='\d{2}'
search = []    
for values in df['9']:
    try:
        search.append(re.search(pattern, values).group())
    except:
        search.append(re.search(pattern, values))
df['day'] = search
df.head(3)

Unnamed: 0,type,7,9,punk_id,id,amount,choice,year,day
0,Bid,321Ξ ($1.14M),"Sep 01, 2021",0,0,321,False,2021,1
1,Bid Withdrawn,320Ξ ($1.11M),"Sep 01, 2021",0,1,320,False,2021,1
2,Bid,320Ξ ($1.1M),"Sep 01, 2021",0,2,320,False,2021,1


In [122]:
#change of format
pattern='\w{3}'
search = []    
for values in df['9']:
    try:
        search.append(re.search(pattern, values).group())
    except:
        search.append(re.search(pattern, values))
df['mon'] = search
df.head(3)

Unnamed: 0,type,7,9,punk_id,id,amount,choice,year,day,mon
0,Bid,321Ξ ($1.14M),"Sep 01, 2021",0,0,321,False,2021,1,Sep
1,Bid Withdrawn,320Ξ ($1.11M),"Sep 01, 2021",0,1,320,False,2021,1,Sep
2,Bid,320Ξ ($1.1M),"Sep 01, 2021",0,2,320,False,2021,1,Sep


In [123]:
#category
conditions = [(df['mon'] == 'Jan'),
              (df['mon'] == 'Feb'),
              (df['mon'] == 'Mar'),
              (df['mon'] == 'Apr'),
              (df['mon'] == 'May'),
              (df['mon'] == 'Jun'),
              (df['mon'] == 'Jul'),
              (df['mon'] == 'Aug'),
              (df['mon'] == 'Sep'),
              (df['mon'] == 'Oct'),
              (df['mon'] == 'Nov'),
              (df['mon'] == 'Dec'),]

values = ['01', '02', '03', '04', '05','06','07','08','09','10','11','12']
df['month'] = np.select(conditions, values)
df.head(5)

Unnamed: 0,type,7,9,punk_id,id,amount,choice,year,day,mon,month
0,Bid,321Ξ ($1.14M),"Sep 01, 2021",0,0,321,False,2021,1,Sep,9
1,Bid Withdrawn,320Ξ ($1.11M),"Sep 01, 2021",0,1,320,False,2021,1,Sep,9
2,Bid,320Ξ ($1.1M),"Sep 01, 2021",0,2,320,False,2021,1,Sep,9
3,Bid Withdrawn,"263Ξ ($904,481)","Sep 01, 2021",0,3,263,False,2021,1,Sep,9
4,Bid,"263Ξ ($849,714)","Aug 29, 2021",0,4,263,False,2021,29,Aug,8


In [124]:
df['date'] = df[['year', 'month', 'day']].apply(lambda x: '-'.join(x), axis=1)
df.head(3)

Unnamed: 0,type,7,9,punk_id,id,amount,choice,year,day,mon,month,date
0,Bid,321Ξ ($1.14M),"Sep 01, 2021",0,0,321,False,2021,1,Sep,9,2021-09-01
1,Bid Withdrawn,320Ξ ($1.11M),"Sep 01, 2021",0,1,320,False,2021,1,Sep,9,2021-09-01
2,Bid,320Ξ ($1.1M),"Sep 01, 2021",0,2,320,False,2021,1,Sep,9,2021-09-01


In [125]:
df.drop(columns=['7','9','year','day','mon','month','choice'], axis=1, inplace=True)
df.head(3)

Unnamed: 0,type,punk_id,id,amount,date
0,Bid,0,0,321,2021-09-01
1,Bid Withdrawn,0,1,320,2021-09-01
2,Bid,0,2,320,2021-09-01


In [126]:
df['amount'] = df['amount'].astype(float)

In [127]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 177692 entries, 0 to 176897
Data columns (total 5 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   type     177692 non-null  object 
 1   punk_id  177692 non-null  int64  
 2   id       177692 non-null  int64  
 3   amount   177692 non-null  float64
 4   date     177692 non-null  object 
dtypes: float64(1), int64(2), object(2)
memory usage: 8.1+ MB


In [128]:
df.isna().sum()

type       0
punk_id    0
id         0
amount     0
date       0
dtype: int64

In [129]:
len(df['punk_id'].unique())

10000

In [130]:
df.tail(3)

Unnamed: 0,type,punk_id,id,amount,date
176895,Bid,8999,177727,0.12,2018-04-04
176896,Offered,8999,177728,6.5,2017-07-21
176897,Claimed,8999,177729,0.0,2017-06-23


In [131]:
df_sort = df.sort_values(by=['punk_id'])
df_sort.tail(5)

Unnamed: 0,type,punk_id,id,amount,date
172946,Offered,9999,173726,16.6,2017-07-14
172947,Offer Withdrawn,9999,173727,0.0,2017-07-14
172948,Bid,9999,173728,1.0,2017-07-14
177630,Offered,9999,173730,2220.0,2017-07-14
172961,Bid,9999,173754,0.25,2017-07-03


In [132]:
#what's missing? no attributes
mylist = list(df_sort['punk_id'].unique())
l = []
def missing_num(mylist):
    for n in range(0,10000): 
        if n not in mylist: 
            l.append(n)
    if not l:
        print("Nothing is missing")
        
missing_num(mylist)

Nothing is missing


In [133]:
df.to_csv('transaction.csv')