# Analisis Data Pendanaan Startup

Nama: Amir Zuhdi Wibowo<br/>
NIM: 223307033<br/>
Kelas: TI-5B<br/>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Membaca data
sample_file_path = "startup_data.csv"
df = pd.read_csv(sample_file_path, on_bad_lines='skip')

In [None]:
# Kolom pendanaan
funding_columns = [
    "No_Stage_Amount", "Seed_Amount", "Series_A_Amount", 
    "Series_B_Amount", "Series_C_Amount", "Series_D_Amount"
]

# Membersihkan simbol $ dan , dari kolom pendanaan
for col in funding_columns:
    df[col] = df[col].str.replace(r"[\$,]", "", regex=True)

# Mengonversi kolom pendanaan menjadi numerik
df[funding_columns] = df[funding_columns].apply(pd.to_numeric, errors='coerce')

In [None]:
# Menangani nilai kosong
string_columns = [
    "Description", "Stage", "Market", "Names", 
    "No_Stage_Date", "Pitch", "Seed_Date", 
    "Series_A_Date", "Series_B_Date", "Series_C_Date", "Series_D_Date"
]

numeric_columns = [
    "No_Stage_Amount", "Seed_Amount", "Series_A_Amount", 
    "Series_B_Amount", "Series_C_Amount", "Series_D_Amount"
]

# Isi nilai kosong pada kolom string dengan 'Unknown'
for col in string_columns:
    df[col] = df[col].fillna("Unknown")

# Isi nilai kosong pada kolom numerik dengan 0
for col in numeric_columns:
    df[col] = df[col].fillna(0)

# Gantikan value dari Stage yang awalnya "-" menjadi "Not Passed"
df["Stage"] = df["Stage"].replace("-", "Not Passed")

In [None]:
# Menghitung total pendanaan
df["Total Funding"] = df[funding_columns].fillna(0).sum(axis=1)

# Tampilkan data
print(df)

In [None]:
# Total Pendanaan per Startup
df["Unique_Description"] = df["Names"] + " (" + df.index.astype(str) + ")"

# Kelompokkan berdasarkan 'Unique_Description'
chart_data = df.groupby("Unique_Description")["Total Funding"].sum().reset_index()

# Buat grafik batang
plt.figure(figsize=(12, 6))
plt.bar(chart_data["Unique_Description"], chart_data["Total Funding"])
plt.title("Total Pendanaan per Startup")
plt.xlabel("Startup")
plt.ylabel("Total Pendanaan")
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
# Distribusi Pendanaan Berdasarkan Tahapan
stage_funding = df.groupby("Stage")[funding_columns].sum().reset_index()

# Buat grafik batang untuk distribusi pendanaan
plt.figure(figsize=(12, 6))
stage_funding.plot(x="Stage", y=funding_columns, kind='bar', stacked=True)
plt.title("Distribusi Pendanaan Berdasarkan Tahapan")
plt.xlabel("Stage")
plt.ylabel("Total Pendanaan")
plt.legend(title="Funding Stage", bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

In [None]:
# Fungsi untuk memfilter berdasarkan stage
def filter_by_stage(stage):
    filtered_data = df[df["Stage"] == stage]
    if filtered_data.empty:
        print(f"Tidak ada startup pada Stage: {stage}")
    else:
        print(f"Startup pada Stage: {stage}")
        print(filtered_data)

# Contoh: Filter untuk stage 'Seed'
filter_by_stage('Seed')

In [None]:
# Membuat Model Regresi Linear
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

# Pilih kolom untuk analisis (sesuaikan dengan data Anda)
X = df[["Seed_Amount", "Series_A_Amount", "Series_B_Amount"]]
y = df["Total Funding"]

# Bagi data menjadi data latih dan uji
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Membuat dan melatih model
reg = LinearRegression()
reg.fit(X_train, y_train)

# Menyimpan Model
import pickle

filename = 'startup.sav'
pickle.dump(reg, open(filename, 'wb'))

print("Model berhasil disimpan.")
