In [17]:
import pandas as pd
from statistics import mean, median

In [111]:
# import csv files

daily_20_21_df = pd.read_csv('CBECI Old Version 2020-2021 Total Daily Energy Consumption.csv')
daily_20_21_v2_df = pd.read_csv('CBECI New Version 2020-2021 Total Daily Energy Consumption.csv')

In [112]:
daily_20_21_df # display preview of the older version of CBECI data (used in Chamanara et al)

Unnamed: 0,Timestamp,Date and Time,"power GUESS, GW","daily energy consumption GUESS, TWh"
0,1577836800,2020-01-01 0:00:00,8.185817,0.196460
1,1577923200,2020-01-02 0:00:00,8.061507,0.193476
2,1578009600,2020-01-03 0:00:00,8.322102,0.199730
3,1578096000,2020-01-04 0:00:00,8.545083,0.205082
4,1578182400,2020-01-05 0:00:00,8.683946,0.208415
...,...,...,...,...
726,1640563200,2021-12-27 0:00:00,13.662334,0.327896
727,1640649600,2021-12-28 0:00:00,13.313983,0.319536
728,1640736000,2021-12-29 0:00:00,13.280707,0.318737
729,1640822400,2021-12-30 0:00:00,12.853075,0.308474


In [113]:
daily_20_21_v2_df # display preview of the latest version of CBECI data (used for testing purposes)

Unnamed: 0,Timestamp,Date and Time,"power GUESS, GW","daily energy consumption GUESS, TWh"
0,1577836800,2020-01-01 0:00:00,8.105745,0.194538
1,1577923200,2020-01-02 0:00:00,7.954713,0.190913
2,1578009600,2020-01-03 0:00:00,8.180876,0.196341
3,1578096000,2020-01-04 0:00:00,8.369841,0.200876
4,1578182400,2020-01-05 0:00:00,8.476155,0.203428
...,...,...,...,...
726,1640563200,2021-12-27 0:00:00,11.331840,0.271964
727,1640649600,2021-12-28 0:00:00,11.042910,0.265030
728,1640736000,2021-12-29 0:00:00,11.015311,0.264367
729,1640822400,2021-12-30 0:00:00,10.660623,0.255855


In [114]:
def total_monthly_estimates(df, leap_year = 0):
    # take in the dataframe with daily energy consumption estimates
    # sum up the daily estimates to get monthly estimates
    # output dataframe with list of monthly estimates
    # do this for one year
    
    month_days = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] # Jan, Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec
    
    if leap_year == 1:
        month_days[1] = 29 # add 1 extra day to February for leap years.

    jan = df.iloc[0:month_days[0]]
    feb = df.iloc[month_days[0]:sum(month_days[0:2])]
    mar = df.iloc[sum(month_days[0:2]):sum(month_days[0:3])]
    apr = df.iloc[sum(month_days[0:3]):sum(month_days[0:4])]
    may = df.iloc[sum(month_days[0:4]):sum(month_days[0:5])]
    jun = df.iloc[sum(month_days[0:5]):sum(month_days[0:6])]
    jul = df.iloc[sum(month_days[0:6]):sum(month_days[0:7])]
    aug = df.iloc[sum(month_days[0:7]):sum(month_days[0:8])]
    sep = df.iloc[sum(month_days[0:8]):sum(month_days[0:9])]
    oco = df.iloc[sum(month_days[0:9]):sum(month_days[0:10])]
    nov = df.iloc[sum(month_days[0:10]):sum(month_days[0:11])]
    dec = df.iloc[sum(month_days[0:11]):sum(month_days[0:12])]

    
    return jan, feb, mar, apr, may, jun, jul, aug, sep, oco, nov, dec

def total_annual_estimate(jan, feb, mar, apr, may, jun, jul, aug, sep, oco, nov, dec):
     return sum([sum(jan['daily energy consumption GUESS, TWh']),sum(feb['daily energy consumption GUESS, TWh']),
     sum(mar['daily energy consumption GUESS, TWh']),sum(apr['daily energy consumption GUESS, TWh']),
     sum(may['daily energy consumption GUESS, TWh']),sum(jun['daily energy consumption GUESS, TWh']),
     sum(jul['daily energy consumption GUESS, TWh']),sum(aug['daily energy consumption GUESS, TWh']),
     sum(sep['daily energy consumption GUESS, TWh']),sum(oco['daily energy consumption GUESS, TWh']),
     sum(nov['daily energy consumption GUESS, TWh']),sum(dec['daily energy consumption GUESS, TWh'])])

In [115]:
# set version 1 or version 2 for CBECI data in this analysis
cbeci_data_df = daily_20_21_v2_df


# months for the year 2020
jan, feb, mar, apr, may, jun, jul, aug, sep, oco, nov, dec = total_monthly_estimates(cbeci_data_df.iloc[0:366], 1) # 367-731 captures 2021

monthly_consumption_2020 = [sum(jan['daily energy consumption GUESS, TWh']), sum(feb['daily energy consumption GUESS, TWh']), 
                            sum(mar['daily energy consumption GUESS, TWh']), sum(apr['daily energy consumption GUESS, TWh']), 
                            sum(may['daily energy consumption GUESS, TWh']), sum(jun['daily energy consumption GUESS, TWh']), 
                            sum(jul['daily energy consumption GUESS, TWh']), sum(aug['daily energy consumption GUESS, TWh']), 
                            sum(sep['daily energy consumption GUESS, TWh']), sum(oco['daily energy consumption GUESS, TWh']), 
                            sum(nov['daily energy consumption GUESS, TWh']), sum(dec['daily energy consumption GUESS, TWh'])]
annual_consumption_2020 = total_annual_estimate(jan, feb, mar, apr, may, jun, jul, aug, sep, oco, nov, dec)
#annual_consumption_2020

# months for the year 2021
jan, feb, mar, apr, may, jun, jul, aug, sep, oco, nov, dec = total_monthly_estimates(cbeci_data_df.iloc[366:731], 0) # 366-731 captures 2021
monthly_consumption_2021 = [sum(jan['daily energy consumption GUESS, TWh']), sum(feb['daily energy consumption GUESS, TWh']), 
                            sum(mar['daily energy consumption GUESS, TWh']), sum(apr['daily energy consumption GUESS, TWh']), 
                            sum(may['daily energy consumption GUESS, TWh']), sum(jun['daily energy consumption GUESS, TWh']), 
                            sum(jul['daily energy consumption GUESS, TWh']), sum(aug['daily energy consumption GUESS, TWh']), 
                            sum(sep['daily energy consumption GUESS, TWh']), sum(oco['daily energy consumption GUESS, TWh']), 
                            sum(nov['daily energy consumption GUESS, TWh']), sum(dec['daily energy consumption GUESS, TWh'])]
annual_consumption_2021 = total_annual_estimate(jan, feb, mar, apr, may, jun, jul, aug, sep, oco, nov, dec)
#annual_consumption_2021

In [116]:
# we now have the ability to compute the total monthly and total yearly energy consumption values for bitcoin mining.
# we also verified that the above algos correctly compute these values by using the latest CBECI data and comparing 
# to data on their website (data downloaded 10/31/23 and compared on 11/1/23.

# the next step is to import the geographic data for 2020-2021. as far as I know, what is available on CBECI did not 
# change significantly after the Aug '23 update. So we assume that these data are the same used in Shamanana et al. (2023).

# Top 10 countries according to CBECI
# Mainland China
# Russian Federation
# Other
# Malaysia
# Kazakhstan
# United States
# Iran, Islamic Rep.
# Canada
# Germany *
# Ireland *

# First, I want to plot how the share of each country listed changed over the two year period. Shamanana et al. do not 
# say how they determined the hashrate share in their environmental footprint calculations. I will go through and 
# compute the electricity use in TWh using every data point for each of the 10 countries in order to determine which 
# month and year they used for their analysis. In their figure they claim their footprints represent the entire 
# 2020-2021 period which is not possible unless they took an average, they do not claim to do this.
# monthly_hashrate_% is a string, need to convert to a float for plotting

def percent_to_float(string):
    # converts percent string to a float
    cleaned_str = string.replace('%', '')
    
    return float(cleaned_str)

geo_distrib_df = pd.read_csv('Geographic Distribution 2020-2021.csv')
geo_distrib_df['monthly_hashrate_%']= geo_distrib_df['monthly_hashrate_%'].apply(percent_to_float)
geo_distrib_df

Unnamed: 0,date,country,monthly_hashrate_%,monthly_absolute_hashrate_EH/S
0,2020-01-01,Mainland China,72.69,79.41
1,2020-01-01,Russian Federation,6.05,6.60
2,2020-01-01,Other,5.56,6.08
3,2020-01-01,Malaysia,4.13,4.52
4,2020-01-01,Kazakhstan,3.49,3.81
...,...,...,...,...
235,2021-12-01,Russian Federation,4.72,8.18
236,2021-12-01,Germany *,3.06,5.31
237,2021-12-01,Malaysia,2.84,4.93
238,2021-12-01,Ireland *,2.06,3.56


In [117]:
# function which takes a country's name and matches and finds all rows that belong to that country within the geo-distribution data frame
def country_geoinfo(country_name, df):
    country_df = df.loc[df['country'] == country_name]
    country_df = country_df.reset_index(drop=True)
    
    return country_df

In [118]:
# extract the distributions over the entire 2020-2021 datum set for all unique countries (other includes multiple countries aggregated)
mainland_china_df = country_geoinfo("Mainland China", geo_distrib_df)
russian_fed_df = country_geoinfo("Russian Federation", geo_distrib_df)
other_df = country_geoinfo("Other", geo_distrib_df)
malaysia_df = country_geoinfo("Malaysia", geo_distrib_df)
kazakhstan_df = country_geoinfo("Kazakhstan", geo_distrib_df)
us_df = country_geoinfo("United States", geo_distrib_df)
iran_df = country_geoinfo("Iran, Islamic Rep.", geo_distrib_df)
ca_df = country_geoinfo("Canada", geo_distrib_df)
germany_df = country_geoinfo("Germany *", geo_distrib_df)
ireland_df = country_geoinfo("Ireland *", geo_distrib_df)

In [119]:
# this function will compute the monthly electricity/energy use for each of the countries in the CBECI data per month.

def yearly_energy_consumption_per_country(monthly_consumption, country_df):
    country_df = country_df.reset_index(drop=True)
    jan = monthly_consumption[0]*country_df['monthly_hashrate_%'][0]/100
    feb = monthly_consumption[1]*country_df['monthly_hashrate_%'][1]/100
    mar = monthly_consumption[2]*country_df['monthly_hashrate_%'][2]/100
    apr = monthly_consumption[3]*country_df['monthly_hashrate_%'][3]/100
    may = monthly_consumption[4]*country_df['monthly_hashrate_%'][4]/100
    jun = monthly_consumption[5]*country_df['monthly_hashrate_%'][5]/100
    jul = monthly_consumption[6]*country_df['monthly_hashrate_%'][6]/100
    aug = monthly_consumption[7]*country_df['monthly_hashrate_%'][7]/100
    sep = monthly_consumption[8]*country_df['monthly_hashrate_%'][8]/100
    oco = monthly_consumption[9]*country_df['monthly_hashrate_%'][9]/100
    nov = monthly_consumption[10]*country_df['monthly_hashrate_%'][10]/100
    dec = monthly_consumption[11]*country_df['monthly_hashrate_%'][11]/100

    return [jan, feb, mar, apr, may, jun, jul, aug, sep, oco, nov, dec]

In [120]:
y2020_mc = yearly_energy_consumption_per_country(monthly_consumption_2020, mainland_china_df[:12])
y2021_mc = yearly_energy_consumption_per_country(monthly_consumption_2021, mainland_china_df[12:])
y202021_mc = sum(y2020_mc) + sum(y2021_mc)

y2020_us = yearly_energy_consumption_per_country(monthly_consumption_2020, us_df[:12])
y2021_us = yearly_energy_consumption_per_country(monthly_consumption_2021, us_df[12:])
y202021_us = sum(y2020_us) + sum(y2021_us)

y2020_kz = yearly_energy_consumption_per_country(monthly_consumption_2020, kazakhstan_df[:12])
y2021_kz = yearly_energy_consumption_per_country(monthly_consumption_2021, kazakhstan_df[12:])
y202021_kz = sum(y2020_kz) + sum(y2021_kz)

y2020_ru = yearly_energy_consumption_per_country(monthly_consumption_2020, russian_fed_df[:12])
y2021_ru = yearly_energy_consumption_per_country(monthly_consumption_2021, russian_fed_df[12:])
y202021_ru = sum(y2020_ru) + sum(y2021_ru)

y2020_ma = yearly_energy_consumption_per_country(monthly_consumption_2020, malaysia_df[:12])
y2021_ma = yearly_energy_consumption_per_country(monthly_consumption_2021, malaysia_df[12:])
y202021_ma = sum(y2020_ma) + sum(y2021_ma)

y2020_ca = yearly_energy_consumption_per_country(monthly_consumption_2020, ca_df[:12])
y2021_ca = yearly_energy_consumption_per_country(monthly_consumption_2021, ca_df[12:])
y202021_ca = sum(y2020_ca) + sum(y2021_ca)
y202021_ca

y2020_de = yearly_energy_consumption_per_country(monthly_consumption_2020, germany_df[:12])
y2021_de = yearly_energy_consumption_per_country(monthly_consumption_2021, germany_df[12:])
y202021_de = sum(y2020_de) + sum(y2021_de)

y2020_ir = yearly_energy_consumption_per_country(monthly_consumption_2020, ireland_df[:12])
y2021_ir = yearly_energy_consumption_per_country(monthly_consumption_2021, ireland_df[12:])
y202021_ir = sum(y2020_ir) + sum(y2021_ir)

y2020_in = yearly_energy_consumption_per_country(monthly_consumption_2020, iran_df[:12])
y2021_in = yearly_energy_consumption_per_country(monthly_consumption_2021, iran_df[12:])
y202021_in = sum(y2020_in) + sum(y2021_in)

y2020_ot = yearly_energy_consumption_per_country(monthly_consumption_2020, other_df[:12])
y2021_ot = yearly_energy_consumption_per_country(monthly_consumption_2021, other_df[12:])
y202021_ot = sum(y2020_ot) + sum(y2021_ot)

y202021_all = y202021_mc + y202021_us + y202021_kz + y202021_ru + y202021_ma + y202021_ca + y202021_de + y202021_ir + y202021_in + y202021_ot

y202021_all

156.13809205374955

In [121]:
sum(monthly_consumption_2020)+sum(monthly_consumption_2021)

156.14022395439997

In [122]:
y202021_mc

71.7250156216264

In [123]:
[y202021_mc, y202021_us, y202021_kz, y202021_ru, y202021_ma, y202021_ca, y202021_de, y202021_ir, y202021_in, y202021_ot]

[71.7250156216264,
 25.963966746125177,
 13.37499680476428,
 11.11868112815799,
 6.78757601962203,
 5.249670556910369,
 3.48362798009454,
 2.907433206867329,
 4.906538066505741,
 10.62058592307571]

In [124]:
[sum(y2020_mc), sum(y2020_us), sum(y2020_kz), sum(y2020_ru), sum(y2020_ma), sum(y2020_ca), sum(y2020_de), sum(y2020_ir), sum(y2020_in), sum(y2020_ot)]

[43.469713433911,
 4.5221125079946995,
 3.10855216642371,
 4.60757038612486,
 3.23533795156235,
 0.7824390071757501,
 0.64898840531312,
 0.5988769385834499,
 2.26689688473909,
 3.8974926503864005]

In [125]:
[sum(y2021_mc), sum(y2021_us), sum(y2021_kz), sum(y2021_ru), sum(y2021_ma), sum(y2021_ca), sum(y2021_in), sum(y2021_de), sum(y2021_ir), sum(y2021_ot)]

[28.255302187715408,
 21.441854238130478,
 10.26644463834057,
 6.51111074203313,
 3.5522380680596797,
 4.4672315497346196,
 2.6396411817666503,
 2.83463957478142,
 2.3085562682838794,
 6.72309327268931]

In [126]:
all_geo_dist_df = pd.read_csv('Geographic Distribution 2020-2021 All Countries Breakdown.csv')
all_geo_dist_df['% monthly_hashrate share']= all_geo_dist_df['% monthly_hashrate share'].apply(percent_to_float)
all_geo_dist_df

Unnamed: 0,date,WB country,% monthly_hashrate share
0,2020-01-01 00:00:00,Australia,0.003
1,2020-01-01 00:00:00,Iraq,0.001
2,2020-01-01 00:00:00,Syria,0.001
3,2020-01-01 00:00:00,Cameroon,0.002
4,2020-01-01 00:00:00,Niger,0.001
...,...,...,...
3056,2021-12-01 00:00:00,Hungary,0.029
3057,2021-12-01 00:00:00,Vietnam,0.025
3058,2021-12-01 00:00:00,Spain,0.024
3059,2021-12-01 00:00:00,Laos,0.023


In [127]:
def all_country_geoinfo(country_name, df):
    country_df = df.loc[df['WB country'] == country_name]
    country_df = country_df.reset_index(drop=True)
    
    return country_df
singapore_df = all_country_geoinfo("Singapore", all_geo_dist_df)
singapore_df = singapore_df.rename(columns={'% monthly_hashrate share': 'monthly_hashrate_%'})
singapore_df

Unnamed: 0,date,WB country,monthly_hashrate_%
0,2020-01-01 00:00:00,Singapore,0.056
1,2020-02-01 00:00:00,Singapore,0.066
2,2020-03-01 00:00:00,Singapore,0.07
3,2020-04-01 00:00:00,Singapore,0.067
4,2020-05-01 00:00:00,Singapore,0.109
5,2020-06-01 00:00:00,Singapore,0.106
6,2020-07-01 00:00:00,Singapore,0.108
7,2020-08-01 00:00:00,Singapore,0.077
8,2020-09-01 00:00:00,Singapore,0.675
9,2020-10-01 00:00:00,Singapore,0.816


In [128]:
y2020_sp = yearly_energy_consumption_per_country(monthly_consumption_2020, singapore_df[:12])
y2021_sp = yearly_energy_consumption_per_country(monthly_consumption_2021, singapore_df[12:])
y202021_sp = sum(y2020_sp) + sum(y2021_sp)
y202021_sp

1.2516866805562419

In [129]:
print(sum(y2020_sp))
print(sum(y2021_sp))

0.29135102487713
0.9603356556791119


In [130]:
68.72 - (44.45 + 4.65 + 3.18 + 4.71 + 3.31 + 0.80 + 2.33 + 0.67 + 0.62 + 0.31)

3.6899999999999977

In [131]:
104.04 - (32.89 + 25.20 + 12.06 + 7.59 + 4.13 + 5.25 + 3.06 + 3.31 + 2.69 + 1.13)

6.730000000000004

In [132]:
104.04 - (32.89 + 25.20 + 12.06 + 7.59 + 4.13 + 5.25 + 3.06 + 3.31 + 2.69 + 1.13) + 68.72 - (44.45 + 4.65 + 3.18 + 4.71 + 3.31 + 0.80 + 2.33 + 0.67 + 0.62 + 0.31)

10.420000000000002

In [133]:
thailand_df = all_country_geoinfo("Thailand", all_geo_dist_df)
thailand_df = thailand_df.rename(columns={'% monthly_hashrate share': 'monthly_hashrate_%'})
thailand_df

Unnamed: 0,date,WB country,monthly_hashrate_%
0,2020-01-01 00:00:00,Thailand,0.548
1,2020-02-01 00:00:00,Thailand,0.336
2,2020-03-01 00:00:00,Thailand,0.262
3,2020-04-01 00:00:00,Thailand,0.288
4,2020-05-01 00:00:00,Thailand,0.425
5,2020-06-01 00:00:00,Thailand,0.418
6,2020-07-01 00:00:00,Thailand,0.439
7,2020-08-01 00:00:00,Thailand,0.485
8,2020-09-01 00:00:00,Thailand,0.392
9,2020-10-01 00:00:00,Thailand,0.407


In [134]:
y2020_tl = yearly_energy_consumption_per_country(monthly_consumption_2020, thailand_df[:12])
y2021_tl = yearly_energy_consumption_per_country(monthly_consumption_2021, thailand_df[12:])
y202021_tl = sum(y2020_tl) + sum(y2021_tl)
print(y202021_tl)
print(sum(y2020_tl))
print(sum(y2021_tl))

1.040687949689908
0.289309136616272
0.751378813073636


In [135]:
norway_df = all_country_geoinfo("Norway", all_geo_dist_df)
norway_df = norway_df.rename(columns={'% monthly_hashrate share': 'monthly_hashrate_%'})
norway_df

Unnamed: 0,date,WB country,monthly_hashrate_%
0,2020-01-01 00:00:00,Norway,1.011
1,2020-02-01 00:00:00,Norway,0.968
2,2020-03-01 00:00:00,Norway,0.857
3,2020-04-01 00:00:00,Norway,0.476
4,2020-05-01 00:00:00,Norway,0.457
5,2020-06-01 00:00:00,Norway,0.389
6,2020-07-01 00:00:00,Norway,0.845
7,2020-08-01 00:00:00,Norway,0.794
8,2020-09-01 00:00:00,Norway,0.885
9,2020-10-01 00:00:00,Norway,0.827


In [109]:
y2020_nw = yearly_energy_consumption_per_country(monthly_consumption_2020, norway_df[:12])
y2021_nw = yearly_energy_consumption_per_country(monthly_consumption_2021, norway_df[12:])
y202021_nw = sum(y2020_nw) + sum(y2021_nw)
print(y202021_nw)
print(sum(y2020_nw))
print(sum(y2021_nw))

1.270337217469767
0.601206834881085
0.669130382588682


In [110]:
mainland_china_df

Unnamed: 0,date,country,monthly_hashrate_%,monthly_absolute_hashrate_EH/S
0,2020-01-01,Mainland China,72.69,79.41
1,2020-02-01,Mainland China,72.98,80.59
2,2020-03-01,Mainland China,67.06,71.65
3,2020-04-01,Mainland China,64.81,72.19
4,2020-05-01,Mainland China,59.48,62.89
5,2020-06-01,Mainland China,64.67,72.19
6,2020-07-01,Mainland China,66.86,81.71
7,2020-08-01,Mainland China,66.86,82.71
8,2020-09-01,Mainland China,67.12,91.1
9,2020-10-01,Mainland China,67.38,88.92


In [137]:
38963.88+2000

40963.88

In [138]:
(38963.88+2000)*0.26

10650.6088

In [139]:
6267/2

3133.5

In [141]:
(38963.88+2000)-6267/2

37830.38