# Goji Data Compilation

<a href="https://colab.research.google.com/github/jasmine-schoch/goji-data-analysis/blob/main/goji_data_compilation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import needed libraries and define constants

In [9]:
import pandas as pd
import numpy as np
import math

# Sheets file containing all of the links to individual company's information:
DATA_SOURCES_GOOGLE_SHEETS = "https://docs.google.com/spreadsheets/d/1oEaZVl3YF6JCxS63wsk10HO2dCVFs_CyfZ-cmlMFU0c/edit?usp=sharing"
GOOGLE_SHEETS_TO_CSV_SUFFIX = "/export?format=csv"

## Initialize Variables and Data

### Define output data frame and columns

In [10]:
# Final data frame
Final = pd.DataFrame()
Final.index = [
    "Total",
    "Carbon Emmisions",
    "Water Usage",
    "Ethical Sourcing",
    "Labor Rights",
    "Transparency & Policy",
    "DEI",
]

### Get data sources URLs into a data frame

In [7]:
csv_url = DATA_SOURCES_GOOGLE_SHEETS[:DATA_SOURCES_GOOGLE_SHEETS.rindex('/')] + GOOGLE_SHEETS_TO_CSV_SUFFIX
data_source_urls_df = pd.read_csv(csv_url, header=None)

In [203]:
# Creating data table of data from all the companies:
for x in range(len(data_source_urls_df)):
    x = data_source_urls_df.iloc[x, 1]
    urlcur = x.replace("/edit#gid=", "/export?format=csv&gid=")
    dfcur = pd.read_csv(urlcur, header=None)
    total = int(dfcur.iloc[68, 9])
    carbon = int(dfcur.iloc[84, 9])
    water = int(dfcur.iloc[85, 9])
    sourcing = sum(map(int, dfcur.iloc[[80, 81], 9]))
    labor = sum(map(int, dfcur.iloc[[71, 72, 73, 76, 79, 92], 9]))
    trans = sum(map(int, dfcur.iloc[[69, 75, 83, 87, 89, 91, 92], 9]))
    dei = int(dfcur.iloc[90, 9])
    Final[dfcur.iloc[2, 2]] = [total, carbon, water, sourcing, labor, trans, dei]

In [204]:
Final

Unnamed: 0,A&F,Adidas Group,American Eagle Outfitters,Allbirds,Amazon,URBN,Besteller,Boohoo,Boyish Jeans,Brother Vellies,...,The Children's Place,The Edinburgh Woollen Mill,TJX,Under Armour,Fast Retailing (Uniqlo),VEJA,VF Corp,Victoria's Secret & Co,Walmart,Zalando
Total,-2,25,-4,15,2,3,12,13,40,27,...,-10,-10,-11,0,24,52,15,6,-1,18
Carbon Emmisions,1,0,0,1,1,0,0,1,0,0,...,1,0,1,0,1,1,3,0,0,1
Water Usage,0,0,0,0,2,0,0,0,0,0,...,0,0,0,0,2,2,1,0,0,0
Ethical Sourcing,1,4,0,2,1,0,4,0,15,0,...,0,0,0,0,2,3,7,0,1,2
Labor Rights,-7,6,1,2,-8,1,2,7,6,0,...,-9,-5,-8,-7,6,12,-8,0,-7,1
Transparency & Policy,-7,5,0,10,-4,2,5,11,22,12,...,-8,-5,-10,-6,8,23,0,1,-4,9
DEI,0,7,0,0,0,5,4,0,0,15,...,0,0,0,3,2,9,0,4,4,4


In [205]:
actFinal = Final.copy()

In [206]:
# Finding percentile rankings:
for x in range(len(Final)):  # looping through each row
    rowcurr = list(Final.iloc[x, :])  # setting row as a list
    length = len(rowcurr)
    for y in range(length):  # looping through each data point
        count = 0
        j = rowcurr[y]
        for z in range(length):
            if j > rowcurr[z]:
                count += 1
        Final.iloc[x, y] = int((math.trunc(round(((count / length) * 100)))))
for x in range(len(Final.columns)):
    Final.iloc[0, x] = int((math.trunc(round((sum(Final.iloc[1:, x]) / 6)))))

In [212]:
Reversed = Final.loc[
    [
        "Carbon Emmisions",
        "Water Usage",
        "Ethical Sourcing",
        "Labor Rights",
        "Transparency & Policy",
        "DEI",
        "Total",
    ]
]

In [214]:
# Saving final table to json
# Don't forget to export file from sidebar!!
Reversed.to_json("percentile.json", orient="split", compression="infer", index="true")

In [211]:
Final

Unnamed: 0,A&F,Adidas Group,American Eagle Outfitters,Allbirds,Amazon,URBN,Besteller,Boohoo,Boyish Jeans,Brother Vellies,...,The Children's Place,The Edinburgh Woollen Mill,TJX,Under Armour,Fast Retailing (Uniqlo),VEJA,VF Corp,Victoria's Secret & Co,Walmart,Zalando
Total,16,45,14,35,30,28,38,33,42,35,...,10,8,9,14,56,75,47,24,21,42
Carbon Emmisions,45,0,0,45,45,0,0,45,0,0,...,45,0,45,0,45,45,86,0,0,45
Water Usage,0,0,0,0,73,0,0,0,0,0,...,0,0,0,0,73,73,68,0,0,0
Ethical Sourcing,27,61,0,39,27,0,61,0,96,0,...,0,0,0,0,39,52,86,0,27,39
Labor Rights,14,68,46,54,7,46,54,80,68,39,...,2,27,7,14,68,91,7,39,14,46
Transparency & Policy,12,52,36,71,27,45,52,73,91,75,...,11,20,0,18,62,95,36,43,27,66
DEI,0,89,0,0,0,77,59,0,0,98,...,0,0,0,54,52,95,0,59,59,59
