In [1]:
from datascience import *
from scipy.stats import pearsonr
import numpy as np
import seaborn as sns
import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

In [2]:
fashion_textiles = Table.read_table("Plastic-based-Textiles-in-clothing-industry.csv")
fashion_textiles

Company,Product_Type,Production_Year,Greenhouse_Gas_Emissions,Pollutants_Emitted,Water_Consumption,Energy_Consumption,Waste_Generation,Sales_Revenue
Zara,Polyester,2020,5000,20,7500,1200,300,500000
Zara,Nylon,2019,3000,15,5000,900,200,450000
Zara,Recycled_Poly,2021,3500,18,6000,1100,250,480000
Zara,Cotton,2018,2000,10,4500,800,180,550000
Zara,Synthetic_Blend,2022,6000,25,8000,1500,350,600000
Zara,Organic_Cotton,2019,2500,12,4000,700,150,420000
Zara,Polyester,2021,4500,22,7000,1300,280,550000
Zara,Recycled_Poly,2020,3800,16,5500,1000,220,510000
Zara,Microfiber,2022,4800,19,6800,1100,270,530000
Zara,Linen,2019,2100,11,4800,850,200,470000


In [10]:
# Step 1: Add log-transformed columns to your table
fashion_textiles = fashion_textiles.with_columns(
    'log_GHG', np.log(fashion_textiles.column('Greenhouse_Gas_Emissions')),
    'log_Pollutants_Emitted', np.log(fashion_textiles.column('Pollutants_Emitted')),
    'log_Water_Consumption', np.log(fashion_textiles.column('Water_Consumption')),
    'log_Energy_Consumption', np.log(fashion_textiles.column('Energy_Consumption')),
    'log_Sales_Revenue', np.log(fashion_textiles.column('Sales_Revenue')),
    'log_Waste_Generation', np.log(fashion_textiles.column('Waste_Generation'))
)

# Step 2: Convert to a pandas DataFrame
df_fashion_textiles = fashion_textiles.to_df()
df_fashion_textiles

Unnamed: 0,Company,Product_Type,Production_Year,Greenhouse_Gas_Emissions,Pollutants_Emitted,Water_Consumption,Energy_Consumption,Waste_Generation,Sales_Revenue,log_GHG,log_Pollutants_Emitted,log_Water_Consumption,log_Energy_Consumption,log_Sales_Revenue,log_Waste_Generation
0,Zara,Polyester,2020,5000,20,7500,1200,300,500000,8.517193,2.995732,8.922658,7.090077,13.122363,5.703782
1,Zara,Nylon,2019,3000,15,5000,900,200,450000,8.006368,2.708050,8.517193,6.802395,13.017003,5.298317
2,Zara,Recycled_Poly,2021,3500,18,6000,1100,250,480000,8.160518,2.890372,8.699515,7.003065,13.081541,5.521461
3,Zara,Cotton,2018,2000,10,4500,800,180,550000,7.600902,2.302585,8.411833,6.684612,13.217674,5.192957
4,Zara,Synthetic_Blend,2022,6000,25,8000,1500,350,600000,8.699515,3.218876,8.987197,7.313220,13.304685,5.857933
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6951,Adidas,Tencel,2018,3218,21,7536,902,192,523173,8.076515,3.044522,8.927447,6.804615,13.167667,5.257495
6952,Urban Outfitters,Microfiber,2019,2622,11,5267,952,170,436096,7.871693,2.397895,8.569216,6.858565,12.985618,5.135798
6953,Urban Outfitters,Organic_Cotton,2019,5920,24,4321,982,177,489765,8.686092,3.178054,8.371242,6.889591,13.101681,5.176150
6954,Adidas,Microfiber,2020,2897,16,4125,1085,321,487443,7.971431,2.772589,8.324821,6.989335,13.096929,5.771441


In [11]:
# Log Conversions
df_fashion_textiles['log_Energy_Intensity'] = df_fashion_textiles['log_Energy_Consumption'] / df_fashion_textiles['log_Sales_Revenue']
df_fashion_textiles['log_Waste_Generation_Intensity'] = df_fashion_textiles['log_Waste_Generation'] / df_fashion_textiles['log_Sales_Revenue']

In [12]:
df_fashion_textiles

Unnamed: 0,Company,Product_Type,Production_Year,Greenhouse_Gas_Emissions,Pollutants_Emitted,Water_Consumption,Energy_Consumption,Waste_Generation,Sales_Revenue,log_GHG,log_Pollutants_Emitted,log_Water_Consumption,log_Energy_Consumption,log_Sales_Revenue,log_Waste_Generation,log_Energy_Intensity,log_Waste_Generation_Intensity
0,Zara,Polyester,2020,5000,20,7500,1200,300,500000,8.517193,2.995732,8.922658,7.090077,13.122363,5.703782,0.540305,0.434661
1,Zara,Nylon,2019,3000,15,5000,900,200,450000,8.006368,2.708050,8.517193,6.802395,13.017003,5.298317,0.522578,0.407031
2,Zara,Recycled_Poly,2021,3500,18,6000,1100,250,480000,8.160518,2.890372,8.699515,7.003065,13.081541,5.521461,0.535339,0.422080
3,Zara,Cotton,2018,2000,10,4500,800,180,550000,7.600902,2.302585,8.411833,6.684612,13.217674,5.192957,0.505733,0.392880
4,Zara,Synthetic_Blend,2022,6000,25,8000,1500,350,600000,8.699515,3.218876,8.987197,7.313220,13.304685,5.857933,0.549673,0.440291
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6951,Adidas,Tencel,2018,3218,21,7536,902,192,523173,8.076515,3.044522,8.927447,6.804615,13.167667,5.257495,0.516767,0.399273
6952,Urban Outfitters,Microfiber,2019,2622,11,5267,952,170,436096,7.871693,2.397895,8.569216,6.858565,12.985618,5.135798,0.528166,0.395499
6953,Urban Outfitters,Organic_Cotton,2019,5920,24,4321,982,177,489765,8.686092,3.178054,8.371242,6.889591,13.101681,5.176150,0.525856,0.395075
6954,Adidas,Microfiber,2020,2897,16,4125,1085,321,487443,7.971431,2.772589,8.324821,6.989335,13.096929,5.771441,0.533662,0.440671


In [13]:
for pt in df_fashion_textiles['Product_Type'].unique():
    subset = df_fashion_textiles[df_fashion_textiles['Product_Type'] == pt]
    r = subset['log_Energy_Intensity'].corr(subset['log_Waste_Generation_Intensity'])
    print(pt, f"r = {r:.4f}")

Polyester r = 0.1726
Nylon r = 0.0345
Recycled_Poly r = -0.0083
Cotton r = -0.0114
Synthetic_Blend r = 0.0847
Organic_Cotton r = 0.0114
Microfiber r = 0.0784
Linen r = 0.0270
Tencel r = 0.0204
Viscose r = 0.0883
Wool r = 0.0501


In [15]:
for pt in df_fashion_textiles['Product_Type'].unique():
    subset = df_fashion_textiles[df_fashion_textiles['Product_Type'] == pt]
    r = subset['log_Energy_Intensity'] \
             .corr(subset['log_Waste_Generation_Intensity'])
    print(pt, f"r = {r:.4f}")

Polyester r = 0.1726
Nylon r = 0.0345
Recycled_Poly r = -0.0083
Cotton r = -0.0114
Synthetic_Blend r = 0.0847
Organic_Cotton r = 0.0114
Microfiber r = 0.0784
Linen r = 0.0270
Tencel r = 0.0204
Viscose r = 0.0883
Wool r = 0.0501
