In [40]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder

In [41]:
def Harmful_for_Satellites(row):
    if (row["wavelength"] < 280 and row["irradiance_in_W/m2"] > 10):
        return 1
    else:
        return 0

In [42]:
def Harmful_for_Astronauts(row):
    if (row["wavelength"] < 320 and row["irradiance_in_W/m2"] > 120):
        return 1
    else:
        return 0

In [43]:
def categorize_wavelength(wavelength):
    if 320 <= wavelength <= 400:
        return 'UVA'
    elif 280 <= wavelength < 320:
        return 'UVB'
    elif 100 <= wavelength < 280:
        return 'UVC'
    elif 400 <= wavelength <= 700:
        return 'Visible'
    elif 700 < wavelength <= 1400:
        return 'NIR'
    elif 1400 < wavelength <= 3000:
        return 'SWIR'
    else:
        return 'Other'

In [44]:
# Reading the raw csv files
raw_data = pd.read_csv("solarcurrent.csv")

In [45]:
# Making new csv file for required columns i.e. neglecting other columns
selected_columns = raw_data[['date','wavelength','irradiance']]
selected_columns.to_csv('required_data.csv', index=False)

In [46]:
# New csv is read and new column is added for irradiance_in_W/m2
required_data = pd.read_csv("required_data.csv")
required_data["irradiance_in_W/m2"] = required_data["wavelength"] * required_data["irradiance"]
required_data.to_csv("required_data.csv", index=False)

In [47]:
# New column is added which displays that the wavelength is in which band
required_data['Band'] = required_data['wavelength'].apply(categorize_wavelength)

In [48]:
# Adding column Harmful_for_Satellites which states 1 if harmful and 0 if not harmful
required_data["Harmful_for_Satellites"] = required_data.apply(Harmful_for_Satellites,axis=1)
required_data.to_csv("required_data.csv", index=False)

In [49]:
# Adding column Harmful_for_Astronauts which states 1 if harmful and 0 if not harmful
required_data["Harmful_for_Astronauts"] = required_data.apply(Harmful_for_Astronauts,axis=1)
required_data.to_csv("required_data.csv", index=False)  

In [50]:
labelencoder = LabelEncoder()
required_data["Encoded_date"] = labelencoder.fit_transform(required_data["date"])
required_data["Encoded_Band"] = labelencoder.fit_transform(required_data["Band"])

In [51]:
# Making input and output feature of the model
X = required_data[['wavelength','irradiance_in_W/m2']]
y_satellite = required_data['Harmful_for_Satellites']
y_astronaut = required_data['Harmful_for_Astronauts']

In [52]:
# Splitting the input and output in train set and test set
X_train_sat, X_test_sat, y_train_sat, y_test_sat = train_test_split(X, y_satellite, test_size=0.2, random_state=42)
X_train_ast, X_test_ast, y_train_ast, y_test_ast = train_test_split(X, y_astronaut, test_size=0.2, random_state=42)

In [53]:
# Training Logistic Regression model for satellites
model_sat = LogisticRegression()
model_sat.fit(X_train_sat, y_train_sat)

In [54]:
# Training Logistic Regression model for astronauts
model_ast = LogisticRegression()
model_ast.fit(X_train_ast, y_train_ast)

In [55]:
# Predicting the output of models using test set
y_pred_sat = model_sat.predict(X_test_sat)
y_pred_ast = model_ast.predict(X_test_ast)

In [56]:
# Evaluating satellite model by comparing predictions and test set
print("Evaluating Satellite Model")
print("Accuracy: ", accuracy_score(y_pred_sat, y_test_sat))
print("Confusion Matrix: \n", confusion_matrix(y_pred_sat, y_test_sat))
print("Classification Report: \n", classification_report(y_pred_sat, y_test_sat))

Evaluating Satellite Model
Accuracy:  0.6928970306788531
Confusion Matrix: 
 [[458350 148185]
 [ 54963      0]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.89      0.76      0.82    606535
           1       0.00      0.00      0.00     54963

    accuracy                           0.69    661498
   macro avg       0.45      0.38      0.41    661498
weighted avg       0.82      0.69      0.75    661498



In [57]:
# Evaluating astronaut model by comparing predictions and test set
print("\nEvaluating Astronaut Model")
print("Accuracy: ", accuracy_score(y_pred_ast, y_test_ast))
print("Confusion Matrix: \n", confusion_matrix(y_pred_ast, y_test_ast))
print("Classification Report: \n", classification_report(y_pred_ast, y_test_ast))


Evaluating Astronaut Model
Accuracy:  0.9298380342797711
Confusion Matrix: 
 [[615086  46412]
 [     0      0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Classification Report: 
               precision    recall  f1-score   support

           0       1.00      0.93      0.96    661498
           1       0.00      0.00      0.00         0

    accuracy                           0.93    661498
   macro avg       0.50      0.46      0.48    661498
weighted avg       1.00      0.93      0.96    661498



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [83]:
# Feeding new data to the model and checking the prediction of model
new_data = pd.DataFrame({'wavelength': [200.3, 230], 'irradiance_in_W/m2': [3,200]})

predictions_sat = model_sat.predict(new_data)
print("Predictions of Satellite Model: ", predictions_sat)

predictions_ast = model_ast.predict(new_data)
print("Prediction of Astronaut Model: ",predictions_ast)

Predictions of Satellite Model:  [1 0]
Prediction of Astronaut Model:  [0 0]


In [89]:
print(required_data["Harmful_for_Astronauts"].value_counts())
print(required_data["Harmful_for_Satellites"].value_counts())

Harmful_for_Astronauts
0    3077034
1     230454
Name: count, dtype: int64
Harmful_for_Satellites
0    2568001
1     739487
Name: count, dtype: int64
