In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display

# 1. Data Preparation
# Original data from the document
data = {
    'Year': [2010, 2012, 2014, 2016, 2018, 2020],
    'Total ASP produced': [0.486, 0.519, 0.539, 0.591, 0.63, 0.714],
    'GHG emissions': [96.4, 98.1, 104.2, 108.2, 113.9, 127.8],
    'Emission intensity': [196.5, 188.9, 193.4, 182.9, 180.8, 179.1],
    'Total ASP supply': [5.54, 5.6, 5.49, 5.71, 5.77, 6.21]
}

df = pd.DataFrame(data)
df.set_index('Year', inplace=True)

# Display original data
print("Original Transposed Data:")
display(df)

# 2. Basic Statistics
stats = pd.DataFrame({
    'Minimum': df.min(),
    'Maximum': df.max(),
    'Standard Deviation': df.std(),
    'Mean': df.mean()
})

print("\nBasic Statistics:")
display(stats)

# 3. Data Standardization
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df)
X_scaled_df = pd.DataFrame(X_scaled, columns=df.columns, index=df.index)

print("\nStandardized Data (Centered and Reduced):")
display(X_scaled_df)

# 4. Correlation Matrix
correlation_matrix = X_scaled_df.corr()
print("\nCorrelation Matrix:")
display(correlation_matrix)

# 5. PCA Analysis
pca = PCA()
principal_components = pca.fit_transform(X_scaled_df)
explained_variance_ratio = pca.explained_variance_ratio_

# Eigenvalues
eigenvalues = pca.explained_variance_
print("\nEigenvalues:")
display(pd.DataFrame(eigenvalues, index=[f'λ{i+1}' for i in range(len(eigenvalues))], columns=['Value']))

# Variance explained by each axis
print("\nPercentage of Variance Explained:")
for i, ratio in enumerate(explained_variance_ratio):
    print(f"Axe {i+1}: {ratio*100:.4f}%")
print(f"Total variance explained (Axes 1+2): {sum(explained_variance_ratio[:2])*100:.4f}%")

# 6. Component Matrix (Loadings)
loadings = pca.components_.T * np.sqrt(pca.explained_variance_)
loadings_df = pd.DataFrame(loadings[:, :2], index=df.columns, columns=['Axe 1', 'Axe 2'])

print("\nMatrix of Loadings (Saturations):")
display(loadings_df)

# 7. Correlation Circle
plt.figure(figsize=(8, 8))
for i, (x, y) in enumerate(loadings[:, :2]):
    plt.arrow(0, 0, x, y, color='b', alpha=0.5)
    plt.text(x*1.1, y*1.1, df.columns[i], color='b')
plt.xlim(-1.1, 1.1)
plt.ylim(-1.1, 1.1)
plt.grid(True)
plt.axhline(0, color='black',linewidth=0.5)
plt.axvline(0, color='black',linewidth=0.5)
plt.title('Correlation Circle (Axes 1-2: 97.9749%)')
circle = plt.Circle((0, 0), 1, color='black', fill=False)
plt.gca().add_artist(circle)
plt.xlabel('Axe 1 (92.7375%)')
plt.ylabel('Axe 2 (5.2374%)')
plt.savefig('correlation_circle.png')
plt.close()

# 8. Principal Components
principal_components_df = pd.DataFrame(principal_components[:, :2], index=df.index, columns=['Axe 1', 'Axe 2'])
print("\nPrincipal Components Matrix:")
display(principal_components_df)

# 9. Quality of Representation (cos^2)
cos2 = np.square(principal_components[:, :2]) / np.sum(np.square(principal_components), axis=1, keepdims=True)
cos2_df = pd.DataFrame(cos2, index=df.index, columns=['Axe 1', 'Axe 2'])
print("\nQuality of Representation (cos^2):")
display(cos2_df)

# 10. Selection of Years
print("\nSelected Years:")
print("Axe 1: 2010, 2012, 2020 (high cos^2 values)")
print("Axe 2: 2016 (highest cos^2 value)")

# 11. Graphical Representation
plt.figure(figsize=(10, 6))
for year in principal_components_df.index:
    x, y = principal_components_df.loc[year]
    color = 'purple' if year in [2010, 2012, 2020] else 'orange' if year == 2016 else 'blue'
    plt.scatter(x, y, c=color, s=100)
    plt.text(x+0.05, y+0.05, year)
plt.axhline(0, color='black', linewidth=0.5)
plt.axvline(0, color='black', linewidth=0.5)
plt.grid(True)
plt.title('Principal Components Plot (Axes 1-2)')
plt.xlabel('Axe 1 (92.7375%)')
plt.ylabel('Axe 2 (5.2374%)')
plt.savefig('principal_components_plot.png')
plt.close()

# 12. Conclusion
print("\nConclusion:")
print("Axe 1: Highlights the relationship between animal protein production, supply, and emission intensity over time.")
print("- 2010 and 2012 show lower animal protein production and supply compared to 2020.")
print("- 2020 shows significant increase in production and supply, but lower emission intensity, suggesting improved efficiency.")
print("Axe 2: Focuses on GHG emissions, with 2016 as an outlier with lower emissions.")
print("- The increase in production and consumption may strain natural resources, but reduced emission intensity suggests efforts toward sustainability.")

Original Transposed Data:


Unnamed: 0_level_0,Total ASP produced,GHG emissions,Emission intensity,Total ASP supply
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010,0.486,96.4,196.5,5.54
2012,0.519,98.1,188.9,5.6
2014,0.539,104.2,193.4,5.49
2016,0.591,108.2,182.9,5.71
2018,0.63,113.9,180.8,5.77
2020,0.714,127.8,179.1,6.21



Basic Statistics:


Unnamed: 0,Minimum,Maximum,Standard Deviation,Mean
Total ASP produced,0.486,0.714,0.083444,0.579833
GHG emissions,96.4,127.8,11.608962,108.1
Emission intensity,179.1,196.5,7.105678,186.933333
Total ASP supply,5.49,6.21,0.261687,5.72



Standardized Data (Centered and Reduced):


Unnamed: 0_level_0,Total ASP produced,GHG emissions,Emission intensity,Total ASP supply
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010,-1.23183,-1.104036,1.474843,-0.753497
2012,-0.798611,-0.94362,0.303191,-0.502331
2014,-0.536054,-0.368012,0.996932,-0.962801
2016,0.146594,0.009436,-0.621798,-0.041861
2018,0.65858,0.5473,-0.945544,0.209305
2020,1.761319,1.858932,-1.207624,2.051185



Correlation Matrix:


Unnamed: 0,Total ASP produced,GHG emissions,Emission intensity,Total ASP supply
Total ASP produced,1.0,0.99114,-0.905696,0.93166
GHG emissions,0.99114,1.0,-0.841585,0.931232
Emission intensity,-0.905696,-0.841585,1.0,-0.812063
Total ASP supply,0.93166,0.931232,-0.812063,1.0



Eigenvalues:


Unnamed: 0,Value
λ1,4.451441
λ2,0.25136
λ3,0.097154
λ4,4.5e-05



Percentage of Variance Explained:
Axe 1: 92.7384%
Axe 2: 5.2367%
Axe 3: 2.0240%
Axe 4: 0.0009%
Total variance explained (Axes 1+2): 97.9750%

Matrix of Loadings (Saturations):


Unnamed: 0,Axe 1,Axe 2
Total ASP produced,1.089553,0.017666
GHG emissions,1.071924,0.150822
Emission intensity,-1.010608,0.4182
Total ASP supply,1.045928,0.231105



Principal Components Matrix:


Unnamed: 0_level_0,Axe 1,Axe 2
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2010,-2.27703,0.507359
2012,-1.286079,-0.290658
2014,-1.418621,0.258169
2016,0.357585,-0.529955
2018,1.174833,-0.504382
2020,3.449312,0.559466



Quality of Representation (cos^2):


Unnamed: 0_level_0,Axe 1,Axe 2
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2010,0.946278,0.04698
2012,0.88333,0.045118
2014,0.858698,0.028439
2016,0.311898,0.685066
2018,0.825929,0.152233
2020,0.973344,0.025606



Selected Years:
Axe 1: 2010, 2012, 2020 (high cos^2 values)
Axe 2: 2016 (highest cos^2 value)

Conclusion:
Axe 1: Highlights the relationship between animal protein production, supply, and emission intensity over time.
- 2010 and 2012 show lower animal protein production and supply compared to 2020.
- 2020 shows significant increase in production and supply, but lower emission intensity, suggesting improved efficiency.
Axe 2: Focuses on GHG emissions, with 2016 as an outlier with lower emissions.
- The increase in production and consumption may strain natural resources, but reduced emission intensity suggests efforts toward sustainability.
