<a href="https://colab.research.google.com/github/edanursahiin/Project-of-DataScience-Course/blob/main/412Project_CodeOnly.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#firstly, we read the dataset

project_data <- read.csv("sample_data/data.csv", header = TRUE, sep = ",")

In [None]:
#then, We aimed to change the column name with this code
#in order not to experience difficulties in the coding part.

colnames(project_data)[colnames(project_data) == "AMOUNT..2020.USD.MILLION."] <- "INVESTMENT"

In [None]:
#the code snippet replaces any missing values (NA) or negative values in the project_data dataset with 0,
#creating a modified data frame where these values are standardized to zero
#for further analysis or data handling purposes.

project_data <- data.frame(lapply(project_data, function(INVESTMENT) ifelse(is.na(INVESTMENT) | INVESTMENT < 0, 0, INVESTMENT)))

In [None]:
# Replace hyphen with underscore in CATEGORY column
project_data$CATEGORY <- gsub("Non-renewables", "Non_renewables", project_data$CATEGORY)

In [None]:
#the code transforms the columns in the project_data dataset into a factor,
#allowing for the analysis of categorical data.
#It then generates a table that displays the frequency of each factor level,
#providing an overview of the distribution of different informations present in the dataset.

project_data$CATEGORY <- factor(project_data$CATEGORY)
table(project_data$CATEGORY)

project_data$REGION <- factor(project_data$REGION)
table(project_data$REGION)

project_data$TECHNOLOGY <- factor(project_data$TECHNOLOGY)
table(project_data$TECHNOLOGY)

In [None]:
#we checked if the informations true

head(project_data)

In [None]:
#we checked if the informations true
tail(project_data)

In [None]:
#we checked the dataset again
summary(project_data)
str(project_data)

In [None]:
#first, we intall the packages that is needed.
install.packages("dplyr")
install.packages("ggplot2")
install.packages("magrittr")

In [None]:
library(dplyr)
library(ggplot2)
library(magrittr)

In [None]:
# Pie chart for investment by technology
ggplot(project_data, aes(x = "", fill = TECHNOLOGY)) +
  geom_bar(width = 1, color = "white") +
  coord_polar("y", start = 0) +
  xlab("") +
  ylab("") +
  ggtitle("Investment by Technology")

In [None]:
# Line plot for investment over time
ggplot(project_data, aes(x = YEAR, y = INVESTMENT, group = 1)) +
  geom_line() +
  xlab("Year") +
  ylab("Investment Amount") +
  ggtitle("Investment Trend over Time")

In [None]:
# Stacked area plot for investment by region over time
ggplot(project_data, aes(x = YEAR, y = INVESTMENT, fill = REGION)) +
  geom_area() +
  xlab("Year") +
  ylab("Cumulative Investment Amount") +
  ggtitle("Cumulative Investment by Region over Time")

In [None]:
#Which regions made the most investment in the energies technologies
project_data %>%
  aggregate(INVESTMENT ~ REGION, data = ., sum, na.rm = T) %>%
  .[order(.$INVESTMENT, decreasing = T),] %>%
  head (., n=7) %>%
  ggplot(., aes(x=REGION, y=INVESTMENT)) + geom_bar(stat="identity", fill="steelblue") +
  theme(axis.text.x = element_text(angle=45)) + xlab("REGIONS") + ylab("Investment amount (in million USD)") + ggtitle("Regions invested in energies")

In [None]:
#This graph was printed out to see how much investment there is in which category.
#Renewable vs Non_renewable

combined_data <- rbind(
  project_data[project_data$CATEGORY == "Renewables", ],
  project_data[project_data$CATEGORY == "Non_renewables", ]
)

ggplot(combined_data, aes(x = CATEGORY, fill = CATEGORY)) +
  geom_bar() +
  labs(title = "Renewable vs Non_renewable Category Chart", x = "Category", y = "Count")

In [None]:
#We have printed out this graph in order to examine the annual
#change of investment amount according to technology types.

# Change in investment over years in technologies
technology_investment <- project_data %>%
  aggregate(INVESTMENT ~ TECHNOLOGY, data = ., sum, na.rm = T) %>%
  .[order(.$INVESTMENT, decreasing = T),] %>%
  head (., n=7) %>% .$TECHNOLOGY

In [None]:
project_data %>%
aggregate(INVESTMENT ~ YEAR + TECHNOLOGY, data=., sum, na.rm = T) %>% .[.$TECHNOLOGY %in% technology_investment,] %>%
ggplot(., aes(x=YEAR, y=INVESTMENT, group=1)) + geom_line(colour = "red") + theme(axis.text.x = element_text(angle=90)) + xlab("YEAR") + ylab("INVESTMENT") + scale_y_log10() +
  ggtitle("Change in investment over years in technologies") + facet_wrap("TECHNOLOGY")

In [None]:
install.packages("neuralnet")
library(neuralnet)

In [None]:
install.packages("caret")
library(caret)

In [None]:
#Since most columns in our data consist of categories,
#we converted it to numeric format to create the nn model.

project_data$INVESTMENT <- as.numeric(project_data$INVESTMENT)

project_data$YEAR <- as.numeric(project_data$YEAR)

project_data$TECHNOLOGY <- as.numeric(project_data$TECHNOLOGY)

project_data$CATEGORY <- as.numeric(project_data$CATEGORY)

project_data$REGION <- as.numeric(project_data$REGION)

In [None]:
str(project_data)

In [None]:
summary(project_data)

In [None]:
# Set the seed for reproducibility
set.seed(123)

# Split the dataset into 80% training and 20% testing
trainIndex <- createDataPartition(project_data$INVESTMENT, p = 0.8, list = FALSE)
train_data <- project_data[trainIndex, ]
test_data <- project_data[-trainIndex, ]

In [None]:
# Train the neural network
neural_model <- neuralnet(INVESTMENT ~ REGION + CATEGORY + TECHNOLOGY, data = test_data, hidden = c(5, 3))

In [None]:
# Make predictions on the test set
predictions <- predict(neural_model, newdata = test_data)

# Calculate the mean squared error
mse <- mean((test_data$INVESTMENT - predictions)^2)
mse

In [None]:
plot(neural_model, rep=1)

In [None]:
print(neural_model)

In [None]:
install.packages("nnet")
library(nnet)

In [None]:
#These codes are used to create and train a neural network model using a dataset consisting of both training and test data.


# Combine the train and test data for training the model on the entire dataset
full_data <- rbind(train_data, test_data)

# Create a neural net
work model using the full dataset
neural_model_full <- nnet(INVESTMENT ~ ., data = full_data, size = 5, maxit = 100)

In [None]:
#This code is used to make predictions with a trained neural network model on a new dataset.

# Make future predictions for a new dataset
new_data <- data.frame(REGION = 4,
                       TECHNOLOGY = 4,
                       YEAR = 2024,
                       CATEGORY = 2)
new_predictions <- predict(neural_model, newdata = new_data)
new_predictions