Import necessary packages:

In [1]:
import pandas as pd
import numpy as np
#import xlsxwriter
#import xlrd
from io import StringIO as sio
import matplotlib.pyplot as mplt
from plotnine import ggplot, aes, geom_line, geom_bar, geom_label, facet_grid, geom_point, geom_text, theme, element_rect
from plotnine import labs, scale_x_timedelta, scale_x_continuous, scale_y_discrete, annotate, scale_y_continuous
from sklearn.linear_model import LinearRegression

ModuleNotFoundError: No module named 'pandas'

Import .txt data, rename variables, and convert names to lower case and print head of data.

In [None]:
pvcost = pd.read_csv("PV system Cost.txt",
                     sep = "\t",
                     skiprows = 6,
                     skipfooter = 0,
                     index_col = False,
                     #names = ["AV Types", "Item", "Cost ($)", "Panel Height (ft.)", "total Cost ($)"],
                     #colspecs= [(0, 13), (14, 23), (24, 32), (33, 51), (51, 70)]
                     )
pvcost.columns = pvcost.columns.str.lower()
pvcost = pvcost.rename(
    columns={
        "av types":"avtypes",
        "item": "item",
        "cost ($)": "cost",
        "panel height (ft.)": "pheight",
        "total cost ($)": "tcost"
        }
    )
print(pvcost.head())
print(pvcost.tail())

Unique values of avtypes variable in pvcost dataframe:

In [None]:
pvcost['avtypes'].unique()

Create a new variables called "tracker" based on above unique values of avtypes in pvcost and print some observations to know if trackers are correctly classified.

In [None]:
# Define function to classify values
def classify_value(x):
    if x == "Typical Fixed PV":
        return "Fixed"
    elif x == "Typical 1-AT PV":
        return "Tracking"
    elif x == "Fixed PV + Grazing":
        return "Fixed"
    elif x == "Tracker PV + Grazing":
        return "Tracking"
    elif x == "Fixed PV + Pollinator":
        return "Fixed"
    elif x == "Tracker PV + Pollinator":
        return "Tracking"
    elif x == "PV + Crops (Vertical Mount)":
        return "Fixed"
    elif x == "PV + Crops (Tracker Stilt Mount)":
        return "Tracking"
    elif x == "PV + Crops (Tracker Stilt Mount)":
        return "Trakcing"
    elif x == "PV + Crops (Reinforced Regular Mount)":
        return "Fixed"
    elif x == "PV + Crops (Tracker Stilt Mount)     ":
        return "Tracking"
    else:
        return "Unclassified"

# Add 'classification' column based on conditions
pvcost['tracker'] = pvcost['avtypes'].apply(classify_value)

# Display the updated DataFrame
#print(pvcost.head())
#print(pvcost.tail())
print(pvcost.loc[8:16, ] )

Basic information about the dataframe:

In [None]:
# Display basic information about the DataFrame
print("Dataset Info:")
print(pvcost.info())

Descriptive statistics for numerical columns

In [None]:
# Generate descriptive statistics for numerical columns
print("Descriptive Statistics:")
print(pvcost.describe().round(2))

Cost per item for various tracking system and panel heights.

In [None]:
# Cost per item for Various tracking system and panel heights.
(
    ggplot(pvcost)
    + facet_grid("tracker~pheight")
    + aes(y = "item",
          x = "cost",
          color="tracker",
          group="tracker")
    + geom_point()
    + geom_line()
    + labs(title= "Cost ($/W) vs Items",
           y = "Items",
           x = "Cost ($/W)")
)

Total cost for various tracking system and panel heights agrivoltaic systems.

In [None]:
# Total Cost vs Height:
(
    ggplot(pvcost)
    #+ facet_grid("tracker~pheight")
    + aes(y = "pheight",
          x = "tcost",
          color = "tracker",
          group = "tracker")
    + geom_point()
    + geom_line()
    + labs(title= "Total Cost ($) vs Height (ft.)",
           y = "Panel Height (ft.)",
           x = "Total Cost ($/W)")
)

There are more than one observations for same facet in above charts. So, lets narrow them down and create one observation per facet.To do so lets summarize the data and generate some information about the data in successive codes.

In [None]:
avgcost = pvcost.groupby(["pheight", "item", "tracker"])["cost"].mean().reset_index().round(2)
#avgcost = pvcost.groupby(["pheight", "item", "tracker"])["cost"].agg(np.mean).reset_index().round(2)
avgcost = pd.DataFrame(avgcost)
print(avgcost.head())
#print(avgcost)

In [None]:
print(avgcost.describe().round(2))

In [None]:
print(avgcost.info())

Here, we have one observation per facet for one item. Lets make a few plots.

In [None]:
(
    ggplot(avgcost)
    + facet_grid("tracker~pheight")
    + aes(y = "item",
          x = "cost",
          color="tracker",
          group="tracker")
    + geom_point()
    + geom_line()
    + labs(title= "Cost ($) vs Items",
           y = "Items",
           x = "Cost ($/W)")
)

In [None]:
totalcost = avgcost.groupby(["pheight", "tracker"])["cost"].sum().reset_index().round(2)
print(totalcost)

Predict cost at pheight = 6.4 and tracker = tracking.

In [None]:
totalcost = pd.get_dummies(totalcost, columns=['tracker'], drop_first=True)
model = LinearRegression()

# Create and fit the model
model = LinearRegression()
model.fit(totalcost[['pheight', 'tracker_Tracking']], totalcost['cost'])

# Predict cost for pheight = 6.4 and tracker = Tracking
pheight_value = 6.4
tracker_value = 1  # 1 for 'Tracking'

predicted_cost = model.predict([[pheight_value, tracker_value]])
print(f"Predicted cost at pheight = {pheight_value} and tracker = 'Tracking': {predicted_cost[0]:.2f}")

predicted_cost = model.predict([[pheight_value, tracker_value]])[0].round(2)

# Create a DataFrame for the new row
new_row = pd.DataFrame({'pheight': [pheight_value], 'tracker_Tracking': [tracker_value], 'cost': [predicted_cost]})

# Append the new row to the original DataFrame using pd.concat
totalcost = pd.concat([totalcost, new_row], ignore_index=True)

# Convert back to original format
totalcost['tracker'] = totalcost['tracker_Tracking'].apply(lambda x: 'Tracking' if x == 1 else 'Fixed')
totalcost.drop(columns=['tracker_Tracking'], inplace=True)

print(totalcost)

In [None]:
# Assuming totalcost is your DataFrame
totalcost['label'] = totalcost.apply(lambda row: f"{row['cost']} ", axis=1)
plot = (
    ggplot(totalcost)
    #+ facet_grid("tracker~pheight")
    #+ aes(y='cost', x='pheight')
    + aes(x = "pheight",
          y = "cost",
          color = "tracker",
          group = "tracker")
    + geom_point()
    + geom_line()
    + geom_text(aes(label='label'), 
                nudge_x=0.05, 
                nudge_y=0.05, 
                size=6)
    + labs(title="CAPEX Cost by Solar Panel Height",
           x="Panel Height (ft.)",
           y="CAPEX Cost ($/W)",
           color = "Tracker")
    + scale_x_continuous(limits=(4.5, 8.5))
    + scale_y_continuous(limits=(1.5, 2.5))
    + theme(panel_background = element_rect(fill = "white"),
            plot_background = element_rect(fill = "lightgrey"))
)
# Display the plot
plot.show()