# Clash of Clans: How many builders do you *really* need?

### (or, should I spend those green gems?)

This next section contains a bit of Python used to prepare the dataset for visualization and analysis. If you aren't interested, just skip down to the results section

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd

In [72]:
building_df = pd.read_csv("building_upgrade_data.csv")
building_df = building_df[building_df["town_hall"] != 11]
research_df = pd.read_csv("research_data.csv")
research_df = research_df[research_df["town_hall"] != 11]

In [73]:
# CONSTANTS
HOURS_PER_DAY = 24.0
MIN_PER_DAY = HOURS_PER_DAY * 60
SEC_PER_DAY = MIN_PER_DAY * 60
UNIT_MAP = {"seconds": SEC_PER_DAY, "minutes": MIN_PER_DAY,
            "hours": HOURS_PER_DAY, "days": 1.0}

In [74]:
# These functions parse the possible time strings
from functools import reduce

def parse_time(t):
    return int(t[0]) / UNIT_MAP[t[1]]

def chunks(l, n):
    for i in range(0, len(l), n):
        yield l[i:i + n]

def parse_time_string(s):
    return reduce(lambda x, y: x + y, map(parse_time, chunks(s.split(' '), 2)))

In [75]:
building_df["build_days"] = building_df["build_time"].map(parse_time_string)
research_df["research_days"] = research_df["research_time"].map(parse_time_string)

In [76]:
def get_build_time(df):
    """This calculates total build time per town hall level"""
    build_time = {}
    grouped = df.groupby(["type"])
    for name, group in grouped:
        regrouped = group.groupby("town_hall")
        prev_quant = group.iloc[0]["quantity"]
        for rname, rgroup in regrouped:
            quant = rgroup["quantity"].iloc[0]
            build_days = quant * rgroup["build_days"].sum()
            build_time.setdefault(rname, 0)
            build_time[rname] += build_days
            # This adds time to each town hall level based on new structure acquisition
            if quant > prev_quant:
                diff = quant - prev_quant
                catch_up_days = diff * group[group["town_hall"] < rname]["build_days"].sum()
                build_time[rname] += catch_up_days
                prev_quant = quant
    return pd.Series(build_time)

In [77]:
build_times = get_build_time(building_df)

In [78]:
# Get research times by town hall, don't forget to add lab upgrade time
lab_build_days = building_df.groupby("type").get_group("laboratory")[["town_hall","build_days"]]
research_times = research_df.groupby("town_hall")["research_days"].sum()
lab_build_days["total_time"] = lab_build_days["build_days"] + research_times.values
research_times = lab_build_days.set_index("town_hall")["total_time"]
times = pd.concat([research_times, build_times], axis=1)
times.columns = ["research_times", "build_times"]

In [79]:
times

Unnamed: 0,research_times,build_times
1,,0.12037
2,,1.012847
3,1.270833,6.432407
4,3.208333,23.502431
5,15.5,80.557986
6,11.0,58.583333
7,57.0,224.815162
8,157.0,453.761111
9,213.0,850.764583
10,258.0,902.783681
