In [1]:
import time
import json
import pandas as pd
import numpy as np

In [2]:
# PATHs

## The directory where the dune data is stored
DUNE_DATA_PATH = '../dune_data'

## The directory where the cryptopunk data with attributes info is stored
CSV_PATH = '../cp'

## The directory where the three databases are stored
DATABASE_PATH = '../database'

## The directory where the cryptopunk images is stored
PUNK_IMG_PATH='../punk_imgs'

## The directory where the visualization data for each visualization is stored
VIS_DATA_PATH = '../vis_data'

## The directory where the scraped tweets data is stored
TWEET_PATH = '../tweets'

In [3]:
# read transaction database
tx_db = pd.read_csv('{}/tx_db.csv'.format(DATABASE_PATH), index_col=0)

# read cryptopunk database
punk_db = pd.read_csv('{}/punk_db.csv'.format(DATABASE_PATH), index_col=0)
punk_db['attributes'] = punk_db['attributes'].apply(eval)

# read trader database
trader_db = pd.read_csv('{}/trader_db.csv'.format(DATABASE_PATH), index_col=0)


### 2. Time River

<img src="../imgs/vis2_example.png" alt="drawing" width="500">

Data format

```
Index
0   datetime: week/month

Column
1   avg_price_albino_male
2   avg_price_albino_female
3   avg_price_light_male
4   avg_price_light_female
5   avg_price_medium_male
6   avg_price_medium_female
7   avg_price_dark_male
8   avg_price_dark_female
```

In [6]:
data2_punk = punk_db[['type','gender','skin_tone']]
data2_punk.reset_index()
data2_tx = tx_db[['date_time','eth_price','punk_id']]

# merge the tx data with punk data
data2 = pd.merge(data2_punk,data2_tx,on="punk_id",how="outer")
data2_clean = data2[data2.eth_price > 0]
# data2_clean['date_time'] = data2_clean.date_time(data2_clean.date_time, format='%Y-%m-%d %H:%M:%S')
data2_clean['date_time'] = pd.to_datetime(data2_clean['date_time'], errors='coerce')
data2_clean['year_month'] = data2_clean['date_time'].dt.to_period('M')

# group the data by gender, skin_tone, and year_month, and compute the mean price
# Male:
# 1
male_light = data2_clean[(data2_clean.gender == 'Male') & ((data2_clean.skin_tone == "Light")) ].groupby("year_month").agg({"eth_price": ["mean"]}).reset_index("year_month")
male_light.insert(male_light.shape[1], 'name', 'Male_Light')

#2
male_medium = data2_clean[(data2_clean.gender == 'Male') & ((data2_clean.skin_tone == "Medium")) ].groupby("year_month").agg({"eth_price": ["mean"]}).reset_index("year_month")
male_medium.insert(male_medium.shape[1], 'name', 'Male_Medium')

#3
male_dark = data2_clean[(data2_clean.gender == 'Male') & ((data2_clean.skin_tone == "Dark")) ].groupby("year_month").agg({"eth_price": ["mean"]}).reset_index("year_month")
male_dark.insert(male_dark.shape[1], 'name', 'Male_Dark')

#4
male_albino = data2_clean[(data2_clean.gender == 'Male') & ((data2_clean.skin_tone == "Albino")) ].groupby("year_month").agg({"eth_price": ["mean"]}).reset_index("year_month")
male_albino.insert(male_albino.shape[1], 'name', 'Male_Albino')

#Female
# 1
female_light = data2_clean[(data2_clean.gender == 'Female') & ((data2_clean.skin_tone == "Light")) ].groupby("year_month").agg({"eth_price": ["mean"]}).reset_index("year_month")
female_light.insert(female_light.shape[1], 'name', 'Female_Light')

#2
female_medium = data2_clean[(data2_clean.gender == 'Female') & ((data2_clean.skin_tone == "Medium")) ].groupby("year_month").agg({"eth_price": ["mean"]}).reset_index("year_month")
female_medium.insert(female_medium.shape[1], 'name', 'Female_Medium')

#3
female_dark = data2_clean[(data2_clean.gender == 'Female') & ((data2_clean.skin_tone == "Dark")) ].groupby("year_month").agg({"eth_price": ["mean"]}).reset_index("year_month")
female_dark.insert(female_dark.shape[1], 'name', 'Female_Dark')

#4
female_albino = data2_clean[(data2_clean.gender == 'Female') & ((data2_clean.skin_tone == "Albino")) ].groupby("year_month").agg({"eth_price": ["mean"]}).reset_index("year_month")
female_albino.insert(female_albino.shape[1], 'name', 'Female_Albino')

# Last dataframe merged for visualization
data2_final =  pd.concat( [male_albino, male_light, male_medium, male_dark,female_albino, female_light, female_medium, female_dark], axis=0 )
data2_final

data2_final.to_csv('{}/data2_final.csv'.format(VIS_DATA_PATH), index=False)
print('Transaction database saved to {}/data2_final.csv'.format(VIS_DATA_PATH))
data2_final


Transaction database saved to ../vis_data/data2_final.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._set_item(key, value)


Unnamed: 0_level_0,year_month,eth_price,name
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,Unnamed: 3_level_1
0,2017-06,0.182600,Male_Albino
1,2017-07,0.434062,Male_Albino
2,2017-08,0.295000,Male_Albino
3,2017-09,0.194000,Male_Albino
4,2017-10,0.153333,Male_Albino
...,...,...,...
53,2022-01,77.177460,Female_Dark
54,2022-02,71.085000,Female_Dark
55,2022-03,82.339032,Female_Dark
56,2022-04,67.063000,Female_Dark


In [7]:
data2_clean

Unnamed: 0,punk_id,type,gender,skin_tone,date_time,eth_price,year_month
2,1,Human,Male,Dark,2019-04-06 05:04:51,31.000000,2019-04
23,14,Human,Female,Dark,2017-07-13 22:07:52,0.900000,2017-07
24,14,Human,Female,Dark,2017-07-17 06:07:06,1.850000,2017-07
25,14,Human,Female,Dark,2018-12-07 19:12:12,2.500000,2018-12
79,53,Human,Female,Light,2021-02-01 20:02:22,20.000000,2021-02
...,...,...,...,...,...,...,...
41551,9997,Zombie,Male,,2021-02-08 13:02:37,99.990000,2021-02
41556,9998,Human,Female,Medium,2020-12-27 00:12:55,15.000000,2020-12
41557,9998,Human,Female,Medium,2021-03-03 20:03:27,38.000000,2021-03
41558,9998,Human,Female,Medium,2021-08-11 04:08:28,110.000000,2021-08
