In [None]:


from google.colab import drive
drive.mount('/content/drive')
import os
project_folder = '/content/drive/MyDrive/astazi'
os.makedirs(project_folder, exist_ok=True)
print('Project folder:', project_folder)
print('Please ensure this is the folder where you uploaded the notebook.')


Mounted at /content/drive
Project folder: /content/drive/MyDrive/astazi
Please ensure this is the folder where you uploaded the notebook.


In [None]:

import os
print('Exists:', os.path.exists('/content/drive/MyDrive'))
print('Project folder exists:', os.path.exists('/content/drive/MyDrive/astazi'))
print('If project folder does not exist create it in Drive or re-run mount cell.')


Exists: True
Project folder exists: True
If project folder does not exist create it in Drive or re-run mount cell.


In [None]:

import pandas as pd, numpy as np, random, datetime, os
random.seed(42); np.random.seed(42)
n = 800
families = ['falcon','atlas','ariane','soyuz','delta','longmarch','electron','vega']
rocket_names = [f'rocket_{i}' for i in range(1,301)]
launch_sites = [
    ('Cape Canaveral',28.396837,-80.605659),
    ('Vandenberg',34.742, -120.5724),
    ('Baikonur',45.965,63.305),
    ('Kourou',5.236, -52.768),
    ('Jiuquan',40.9606,100.2983),
    ('Plesetsk',62.927,40.472),
    ('Tanegashima',30.375,130.957),
    ('Satish Dhawan',13.7199,80.2304)
]
orbits = ['LEO','GTO','MEO','HEO','SSO','Transfer']

rows = []
start = datetime.date(2000,1,1)
for i in range(1,n+1):
    family = random.choice(families)
    name = random.choice(rocket_names)
    days = random.randint(0, 9000)
    launch_date = start + datetime.timedelta(days=days)
    site, lat, lon = random.choice(launch_sites)
    payload = max(50, int(abs(np.random.normal(5000 if family in ['falcon','ariane','delta'] else 500, 2000))))
    orbit = random.choice(orbits)
    base = 0.9 if family in ['falcon','ariane'] else 0.85 if family in ['atlas','delta'] else 0.75 if family in ['soyuz','longmarch'] else 0.65
    prob = min(max(base - (payload/20000) + np.random.normal(0,0.05), 0.02), 0.99)
    success = int(np.random.rand() < prob)
    flight_time = int(np.random.normal(15 if orbit=='LEO' else 45 if orbit=='GTO' else 120, 20))
    cost = round(max(5, np.random.normal(50 if family in ['falcon','ariane'] else 30, 15)),2)
    rows.append((i, name, family, launch_date.isoformat(), site, lat + np.random.normal(0,0.02), lon + np.random.normal(0,0.02), payload, orbit, success, flight_time, cost))

df = pd.DataFrame(rows, columns=['mission_id','rocket_name','family','launch_date','launch_site','lat','lon','payload_kg','orbit','success','flight_time_min','cost_million_usd'])

for col in ['rocket_name','family','launch_site','orbit']:
    df.loc[df.sample(frac=0.02, random_state=1).index, col] = None
df.loc[df.sample(frac=0.01, random_state=2).index, 'cost_million_usd'] = None

csv_path = os.path.join('/content/drive/MyDrive/astazi', 'rockets_synthetic.csv')
df.to_csv(csv_path, index=False)
print('Saved CSV to', csv_path)
df.head()


Saved CSV to /content/drive/MyDrive/astazi/rockets_synthetic.csv


Unnamed: 0,mission_id,rocket_name,family,launch_date,launch_site,lat,lon,payload_kg,orbit,success,flight_time_min,cost_million_usd
0,1,rocket_13,atlas,2012-05-03,Kourou,5.241581,-52.74779,1493,GTO,1,22,34.78
1,2,rocket_53,ariane,2024-06-18,Vandenberg,34.707502,-120.583646,3838,SSO,1,124,21.3
2,3,rocket_17,electron,2001-05-03,Vandenberg,34.729411,-120.560446,1525,GTO,1,30,5.0
3,4,rocket_259,soyuz,2001-03-10,Kourou,5.223987,-52.773834,5618,Transfer,1,96,35.64
4,5,rocket_113,electron,2020-02-24,Jiuquan,40.956011,100.306087,703,LEO,1,4,29.92


In [None]:

import matplotlib.pyplot as plt
df2 = df.copy()
for c in ['rocket_name','family','launch_site','orbit']:
    df2[c] = df2[c].astype(str).str.lower().replace('none', pd.NA)
df2['launch_date'] = pd.to_datetime(df2['launch_date'])
df2['mission_year'] = df2['launch_date'].dt.year
df2['payload_class'] = pd.cut(df2['payload_kg'], bins=[0,500,2000,5000,20000], labels=['small','medium','large','heavy'])
df2['cost_million_usd'] = df2['cost_million_usd'].fillna(df2['cost_million_usd'].median())
df2['cost_per_kg'] = df2['cost_million_usd']*1e6 / (df2['payload_kg']+1)

clean_csv = os.path.join('/content/drive/MyDrive/astazi', 'rockets_synthetic_cleaned.csv')
df2.to_csv(clean_csv, index=False)
print('Saved cleaned CSV to', clean_csv)

plt.figure(figsize=(10,2.2))
plt.axis('off')
head = df2.head(8)
table = plt.table(cellText=head.values, colLabels=head.columns, loc='center')
table.auto_set_font_size(False); table.set_fontsize(8)
plt.tight_layout()
dataset_head_path = os.path.join('/content/drive/MyDrive/astazi','dataset_head.png')
plt.savefig(dataset_head_path, dpi=150, bbox_inches='tight')
plt.close()
print('Saved dataset_head.png to', dataset_head_path)


Saved cleaned CSV to /content/drive/MyDrive/astazi/rockets_synthetic_cleaned.csv
Saved dataset_head.png to /content/drive/MyDrive/astazi/dataset_head.png


In [None]:

cleaning_code = '''# Cleaning snippet to screenshot in Colab
for c in ['rocket_name','family','launch_site','orbit']:
    df2[c] = df2[c].astype(str).str.lower().replace('none', pd.NA)
df2['launch_date'] = pd.to_datetime(df2['launch_date'])
df2['mission_year'] = df2['launch_date'].dt.year
df2['payload_class'] = pd.cut(df2['payload_kg'], bins=[0,500,2000,5000,20000], labels=['small','medium','large','heavy'])
df2['cost_million_usd'] = df2['cost_million_usd'].fillna(df2['cost_million_usd'].median())
'''
with open('/content/drive/MyDrive/astazi/cleaning_code.txt','w') as f:
    f.write(cleaning_code)
print('Wrote cleaning_code.txt to /content/drive/MyDrive/astazi/cleaning_code.txt')
print('Instruction: take screenshot of the cleaning code cell and save as /content/drive/MyDrive/astazi/cleaning_code.png')


Wrote cleaning_code.txt to /content/drive/MyDrive/astazi/cleaning_code.txt
Instruction: take screenshot of the cleaning code cell and save as /content/drive/MyDrive/astazi/cleaning_code.png


In [None]:

import seaborn as sns

plt.figure(figsize=(8,4))
plt.hist(df2['payload_kg'], bins=30)
plt.title('Payload distribution (kg)'); plt.xlabel('payload_kg'); plt.ylabel('count')
hist_path = '/content/drive/MyDrive/astazi/eda_histogram_1.png'
plt.savefig(hist_path, dpi=150, bbox_inches='tight'); plt.close()
print('Saved', hist_path)


plt.figure(figsize=(10,4))
sns.boxplot(x='family', y='cost_per_kg', data=df2[df2['family'].notna()])
plt.title('Cost per kg by family (boxplot)')
boxplot_path = '/content/drive/MyDrive/astazi/eda_boxplot.png'
plt.savefig(boxplot_path, dpi=150, bbox_inches='tight'); plt.close()
print('Saved', boxplot_path)


plt.figure(figsize=(6,5))
corr = df2.select_dtypes(include=[np.number]).corr()
sns.heatmap(corr, annot=True, fmt='.2f')
heatmap_path = '/content/drive/MyDrive/astazi/correlation_heatmap.png'
plt.savefig(heatmap_path, dpi=150, bbox_inches='tight'); plt.close()
print('Saved', heatmap_path)


Saved /content/drive/MyDrive/astazi/eda_histogram_1.png
Saved /content/drive/MyDrive/astazi/eda_boxplot.png
Saved /content/drive/MyDrive/astazi/correlation_heatmap.png


In [None]:

import sqlite3
conn = sqlite3.connect('/content/drive/MyDrive/astazi/rockets.db')
df2.to_sql('missions', conn, if_exists='replace', index=False)
query1 = '''
SELECT family, COUNT(*) as cnt, ROUND(AVG(payload_kg),1) as avg_payload, ROUND(AVG(cost_million_usd),2) as avg_cost
FROM missions
GROUP BY family
ORDER BY cnt DESC;
'''
res1 = pd.read_sql_query(query1, conn)
with open('/content/drive/MyDrive/astazi/sql_query_1.txt','w') as f:
    f.write(query1.strip())
plt.figure(figsize=(8,2.2)); plt.axis('off')
table = plt.table(cellText=res1.values, colLabels=res1.columns, loc='center')
table.auto_set_font_size(False); table.set_fontsize(9); plt.tight_layout()
res1_path = '/content/drive/MyDrive/astazi/sql_results_table.png'
plt.savefig(res1_path, dpi=150, bbox_inches='tight'); plt.close()
print('Saved SQL results to', res1_path)
print('Saved SQL query to /content/drive/MyDrive/astazi/sql_query_1.txt')


Saved SQL results to /content/drive/MyDrive/astazi/sql_results_table.png
Saved SQL query to /content/drive/MyDrive/astazi/sql_query_1.txt


In [None]:

import folium
m = folium.Map(location=[20,0], zoom_start=2)
sites = df2.groupby('launch_site').first().reset_index()[['launch_site','lat','lon']]
for _, r in sites.iterrows():
    folium.Marker(location=[r['lat'], r['lon']], popup=str(r['launch_site'])).add_to(m)
map_path = '/content/drive/MyDrive/astazi/rockets_map.html'
m.save(map_path)
print('Saved Folium map to', map_path)
print('Instruction: Open this HTML from Drive and take screenshot as /content/drive/MyDrive/astazi/folium_map.png')


Saved Folium map to /content/drive/MyDrive/astazi/rockets_map.html
Instruction: Open this HTML from Drive and take screenshot as /content/drive/MyDrive/astazi/folium_map.png


In [None]:


!pip install jupyter-dash --quiet
from jupyter_dash import JupyterDash
import plotly.express as px
from dash import html, dcc
from dash.dependencies import Input, Output

app = JupyterDash(__name__)
fig = px.scatter(df2.sample(300, random_state=2), x='payload_kg', y='cost_per_kg', color='success', hover_data=['family','orbit'])
app.layout = html.Div([html.H3('Rockets: payload vs cost_per_kg'), dcc.Graph(id='scatter', figure=fig),
                       dcc.Slider(id='year_slider', min=int(df2['mission_year'].min()), max=int(df2['mission_year'].max()), value=int(df2['mission_year'].max()), step=1)])

@app.callback(Output('scatter','figure'), [Input('year_slider','value')])
def filter_year(y):
    dff = df2[df2['mission_year']<=y].sample(300, random_state=2)
    fig = px.scatter(dff, x='payload_kg', y='cost_per_kg', color='success', hover_data=['family','orbit'])
    return fig

print('To run the dashboard in Colab: run app.run_server(mode="external") then open the URL and take screenshot saved as /content/drive/MyDrive/astazi/dash_overview.png')


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/7.9 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/7.9 MB[0m [31m53.1 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m7.9/7.9 MB[0m [31m124.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m81.7 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m1.5/1.6 MB[0m [31m159.3 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m1.5/1.6 MB[0m [31m23.1 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.6/1.6 MB[0m [31m17.4 MB/s[0m eta [36m0:00:01


JupyterDash is deprecated, use Dash instead.
See https://dash.plotly.com/dash-in-jupyter for more details.



To run the dashboard in Colab: run app.run_server(mode="external") then open the URL and take screenshot saved as /content/drive/MyDrive/astazi/dash_overview.png


In [None]:

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

dfm = df2.copy()


categorical_cols = dfm.select_dtypes(include=['object', 'category']).columns.tolist()


drop_cols = ['mission_id','rocket_name','launch_date','lat','lon','success']
categorical_cols = [c for c in categorical_cols if c not in drop_cols]


dfm = pd.get_dummies(dfm, columns=categorical_cols, drop_first=True)


X = dfm.drop(columns=drop_cols)
y = dfm['success']


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y
)


clf = RandomForestClassifier(n_estimators=150, random_state=42)
clf.fit(X_train, y_train)


with open('/content/drive/MyDrive/astazi/model_training_summary.txt','w') as f:
    f.write(f'RandomForest training\nn_train={len(X_train)}, n_test={len(X_test)}, features={X.shape[1]}')
print('Saved model_training_summary.txt to /content/drive/MyDrive/astazi/')


y_pred = clf.predict(X_test)
y_proba = clf.predict_proba(X_test)[:,1]


cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(4,3))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('predicted')
plt.ylabel('actual')
conf_path = '/content/drive/MyDrive/astazi/confusion_matrix.png'
plt.savefig(conf_path, dpi=150, bbox_inches='tight')
plt.close()
print('Saved', conf_path)


fpr, tpr, _ = roc_curve(y_test, y_proba)
roc_auc = auc(fpr, tpr)
plt.figure(figsize=(5,4))
plt.plot(fpr, tpr, label=f'AUC={roc_auc:.3f}')
plt.plot([0,1],[0,1],'--')
plt.legend()
plt.title('ROC curve')
plt.xlabel('FPR')
plt.ylabel('TPR')
roc_path = '/content/drive/MyDrive/astazi/roc_curve.png'
plt.savefig(roc_path, dpi=150, bbox_inches='tight')
plt.close()
print('Saved', roc_path)


fi = pd.Series(clf.feature_importances_, index=X.columns).sort_values(ascending=False).head(20)
plt.figure(figsize=(8,4))
fi.plot(kind='bar')
plt.title('Top 20 feature importances')
plt.tight_layout()
fi_path = '/content/drive/MyDrive/astazi/feature_importances.png'
plt.savefig(fi_path, dpi=150, bbox_inches='tight')
plt.close()
print('Saved', fi_path)


Saved model_training_summary.txt to /content/drive/MyDrive/astazi/
Saved /content/drive/MyDrive/astazi/confusion_matrix.png
Saved /content/drive/MyDrive/astazi/roc_curve.png
Saved /content/drive/MyDrive/astazi/feature_importances.png


In [None]:


for f in sorted(os.listdir('/content/drive/MyDrive/astazi')):
    print('-', f)



All done. List of files in project folder:
- Rockets_Project_Generated_astazi.ipynb
- cleaning_code.txt
- confusion_matrix.png
- correlation_heatmap.png
- dataset_head.png
- eda_boxplot.png
- eda_histogram_1.png
- feature_importances.png
- model_training_summary.txt
- roc_curve.png
- rockets.db
- rockets_map.html
- rockets_synthetic.csv
- rockets_synthetic_cleaned.csv
- sql_query_1.txt
- sql_results_table.png

Remember to create these screenshots manually and save them into the same folder with these exact filenames:
- cleaning_code.png
- sql_query_1.png
- folium_map.png
- dash_overview.png
- model_training_summary.png
