In [1]:
import os
import heapq
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from meta.scripts.utils.pandas_utils import dump_tsv

TITLES = ("Capacity", "Internal Resistance", "Maximal Voltage")
cells_df = pd.concat([pd.read_csv(i).set_index("no") for i in ("18650-1.csv", "18650-3.csv")], 
                     sort=False, axis=0)

In [2]:
sns.set(style="whitegrid")
fig = plt.figure(figsize=(10, 10))

for idx, column in enumerate(cells_df.columns):
    ax = fig.add_subplot(2, 2, idx + 1)
    hist_plot = sns.histplot(data=cells_df[column], ax=ax, 
                             color=sns.color_palette("hls", len(cells_df.columns))[idx], kde=True)
    xlim = hist_plot.get_xlim()
    hist_plot.set_xlim([xlim[0], 1.033 * xlim[1]])
    hist_plot.set_title("Data distribution for {}".format(TITLES[idx]), 
                        fontsize="larger")

fig.suptitle("VariCore NCR18650b charts", fontsize="x-large", weight="semibold")
plt.tight_layout()
# plt.show()
plt.savefig("hist_plot.png", dpi=300)
plt.clf()
plt.close()

In [3]:
fig = plt.figure(figsize=(10, 10))

for idx, column in enumerate(cells_df.columns):
    ax = fig.add_subplot(2, 2, idx + 1)
    data = cells_df[column]
    mean = int(np.floor(data.mean()))
    box_plot = sns.boxplot(y=data, ax=ax, color=sns.color_palette("hls", len(cells_df.columns))[idx])
    # ax.text(x=idx % 2, y=mean, s="{} mean = {}".format(column, mean), fontsize=15)
    ylim = box_plot.get_ylim()[0]
    for xtick in box_plot.get_xticks():
        box_plot.text(x=xtick, y=ylim * 1.005, s="{} mean = {}".format(column, mean), 
                      fontstyle="italic", fontsize="larger", ha="center", va="center", 
                      weight="light")
    box_plot.set_title("Data description for {}".format(TITLES[idx]), fontsize="larger")

fig.suptitle("VariCore NCR18650b charts", fontsize="x-large", weight="semibold")
plt.tight_layout()
# plt.show()
plt.savefig(os.path.join(os.getcwd(), "box_plot.png"), dpi=300)
plt.clf()
plt.close()

In [4]:
def sublist_creator(d: dict, n: int):
    # Based on: https://stackoverflow.com/questions/61648065/split-list-into-n-sublists-with-approximately-equal-sums
    lists_k = [[] for _ in range(n)]
    lists_v = [[] for _ in range(n)]
    totals = [(0, i) for i in range(n)]
    heapq.heapify(totals)
    for k, v in d.items():
        total, index = heapq.heappop(totals)
        lists_k[index].append(k)
        lists_v[index].append(v)
        heapq.heappush(totals, (total + v, index))
    return dict(keys=lists_k, values=lists_v)

# Removing the cells with lowest capacity
build_df = cells_df.sort_values("mAh", ascending=False).head(65)
# The cells described in "18650-2.csv" were from another package
sub_lists = sublist_creator(build_df["mAh"].to_dict(), 13)
sub_dfs = [build_df.loc[i, :] for i in sub_lists["keys"]]

In [5]:
assembly_df = pd.DataFrame([{
    "mAh_sum": sum(i["mAh"]), "mAh_mean": np.mean(i["mAh"]), "mOhm_sum": sum(i["mOhm"]), 
    "mOhm_mean": np.mean(i["mOhm"]), "cells": ",".join(str(j) for j in i.index)} for i in sub_dfs])

dump_tsv(assembly_df, os.path.join(os.getcwd(), "assembly.tsv"))