# 0. 必要なデータ、モジュールのインストール

In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import set_random_seed

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dropout


import matplotlib
import matplotlib.pyplot as plt
import japanize_matplotlib

font_path = '/usr/share/fonts/opentype/noto/NotoSansCJK-Black.ttc'
ipaex_gothic = matplotlib.font_manager.FontProperties(fname=font_path)

# グラフのフォントに適用
plt.rcParams['font.family'] = ipaex_gothic.get_name()

!wget -O data.zip https://www.dropbox.com/scl/fi/nr1caawmfnd97g1lteokh/data.zip?rlkey=02fow7tv7eoywq1p44wou2xv8&st=h9kq01bu&dl=0
!unzip /content/data.zip

# 学習データの読み込み
df_train = pd.read_csv("train.csv")

# テストデータの読み込み
df_test = pd.read_csv("test.csv")

ModuleNotFoundError: No module named 'sklearn'

# 1. 条件分岐（if文）を用いた予測

## 1-1. 条件の設定

In [None]:
# データ（修正後カラム名）
data = [
    {"GPA": 2.0, "Programming": 75, "Internship": 1, "Recruit_score": 69},
    {"GPA": 1.6, "Programming": 45, "Internship": 0, "Recruit_score": 43},
    {"GPA": 3.2, "Programming": 97, "Internship": 1, "Recruit_score": 64},
    {"GPA": 3.2, "Programming": 66, "Internship": 1, "Recruit_score": 77},
    {"GPA": 3.6, "Programming": 41, "Internship": 0, "Recruit_score": 80},
]

# 正解ラベル
labels = [0, 0, 1, 1, 0]

# ルールを設定
def predict_if(record):
  # -----------採用と予測される条件を追記------------
  if record["GPA"] >= ?? and record["Internship"] == ??:
    return 1
  return 0

# 判定
for i, (rec, true_label) in enumerate(zip(data, labels)):
    pred = predict_if(rec)
    print(f"サンプル {i+1}: ルールベースによる出力 = {pred}, 正解 = {true_label}, {'✅' if pred == true_label else '❌'}")

## 1-2.条件分岐を用いた予測


In [None]:
y_true = df_test["Label"]
y_pred = df_test.apply(predict_if, axis=1)

from sklearn.metrics import accuracy_score
acc = accuracy_score(y_true, y_pred)
print(f"IF文予測の精度: {acc:.3f}")

# 2. ロジスティック回帰

## 2-1. 学習を行う前のロジスティック回帰（ランダムな重み）

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
from scipy.special import expit
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
# 可視化用関数
def plot_network(weights, title):
    G = nx.DiGraph()
    input_nodes = feature_names
    hidden_node = "H1"
    output_node = "HiringProb"

    for node in input_nodes:
        G.add_node(node)
    G.add_node(hidden_node)
    G.add_node(output_node)

    for i, node in enumerate(input_nodes):
        G.add_edge(node, hidden_node, weight=weights[i])
    G.add_edge(hidden_node, output_node, weight=1.0)

    pos = {
        "GPA": (-2, 3),
        "Programming": (-2, 2),
        "Internship": (-2, 1),
        "Recruit_score": (-2, 0),
        "H1": (0, 1.5),
        "HiringProb": (2, 1.5)
    }

    plt.figure(figsize=(10, 6))
    nx.draw_networkx_nodes(G, pos, node_color='skyblue', node_size=2500)
    nx.draw_networkx_labels(G, pos, font_size=10)

    edges = G.edges(data=True)
    nx.draw_networkx_edges(G, pos, edgelist=edges, arrowstyle='-|>', arrowsize=20)

    # 重みラベル（出力層の重みは非表示）
    edge_labels = {
        (u, v): f"{d['weight']:.2f}"
        for u, v, d in edges
        if not (u == hidden_node and v == output_node)
    }
    nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=9)

    plt.title(title)
    plt.axis('off')
    plt.tight_layout()
    plt.show()


# 特徴量名
feature_names = ["GPA", "Programming", "Internship", "Recruit_score"]

# --- データ準備 ---
X_train = df_train[feature_names].values
y_train = df_train["Label"].values
X_test = df_test[feature_names].values
y_test = df_test["Label"].values

# スケーリング
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


# 初期重み（ランダムに設定）
np.random.seed(42)
w_init = np.random.uniform(0, 1, size=X_train_scaled.shape[1])
b_init = 0.0

# 学習前の可視化
plot_network(w_init, "ロジスティック回帰（ランダムな重み）")

# === 学習前の精度を評価 ===
z_init = np.dot(X_test_scaled, w_init) + b_init
y_pred_init = (expit(z_init) >= 0.5).astype(int)
acc_init = accuracy_score(y_test, y_pred_init)
print(f"予測精度: {acc_init:.3f}")

## 2-2. ロジスティック回帰の学習と精度の向上

In [None]:
# --- 学習 ---
w = w_init.copy()
b = b_init
lr = 0.1
epochs = 100

for epoch in range(epochs):
    z = np.dot(X_train_scaled, w) + b
    y_pred = expit(z)
    error = y_pred - y_train
    dw = np.dot(X_train_scaled.T, error) / len(X_train_scaled)
    db = np.mean(error)
    w -= lr * dw
    b -= lr * db

# 学習後の可視化
plot_network(w, "ロジスティック回帰（学習後）")

# 学習後の精度
z_test = np.dot(X_test_scaled, w) + b
y_test_pred = (expit(z_test) >= 0.5).astype(int)
acc_test = accuracy_score(y_test, y_test_pred)

print(f"[学習後] 予測精度: {acc_test:.3f}")

# 3. ディープラーニング

## 3-1. ディープラーニングの学習と予測



In [None]:
import numpy as np
import tensorflow as tf
import random
from tensorflow.keras.utils import set_random_seed
import warnings
warnings.filterwarnings('ignore')

SEED = 42
np.random.seed(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)
set_random_seed(SEED)

# 特徴量のスケーリング
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Deep learningのモデルを設定
model = Sequential()
model.add(Dense(8, input_dim=4, activation='relu', kernel_initializer='he_normal'))
model.add(Dense(4, activation='relu', kernel_initializer='he_normal'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])

early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)  # patience長め

# 学習
model.fit(X_train_scaled, y_train, epochs=200, batch_size=16,  # 小さなbatch_sizeで更新頻度UP
                   validation_split=0.2, callbacks=[early_stop], verbose=0)

# テストデータで予測
y_pred_nn = model.predict(X_test_scaled, verbose=0).flatten()
y_pred_nn_class = (y_pred_nn > 0.5).astype(int)
acc_nn = accuracy_score(y_test, y_pred_nn_class)

# 結果表示
improved_result = pd.DataFrame({
    "モデル": ["ディープラーニング"],
    "正解率(Accuracy)": [acc_nn]
})
print("ディープラーニングの精度：", acc_nn)

## 3-2. ディープラーニングの重みの可視化

In [None]:
# 最初のDense層（入力→隠れ1）の重みとバイアスを取得
# 各層の重みとバイアスを取得
W1, b1 = model.layers[0].get_weights()  # 入力 → 中間層1
W2, b2 = model.layers[1].get_weights()  # 中間層1 → 中間層2
W3, b3 = model.layers[2].get_weights()  # 中間層2 → 出力

print("入力層→1層目間の重み：\n", W1)
print("\n1層目→2層目間の重み：\n", W2)
print("\n2層目→出力層の重み：\n", W3)


# ノード定義
input_nodes = ["GPA", "Programming", "Internship", "Recruit_score"]
hidden1_nodes = [f"H1_{i+1}" for i in range(W1.shape[1])]
hidden2_nodes = [f"H2_{i+1}" for i in range(W2.shape[1])]
output_node = "HiringProb"

G = nx.DiGraph()

# ノード追加
for node in input_nodes + hidden1_nodes + hidden2_nodes + [output_node]:
    G.add_node(node)

# エッジ追加：入力層 → 隠れ層1
for i, in_node in enumerate(input_nodes):
    for j, h1_node in enumerate(hidden1_nodes):
        G.add_edge(in_node, h1_node, weight=W1[i, j])

# エッジ追加：隠れ層1 → 隠れ層2
for i, h1_node in enumerate(hidden1_nodes):
    for j, h2_node in enumerate(hidden2_nodes):
        G.add_edge(h1_node, h2_node, weight=W2[i, j])

# エッジ追加：隠れ層2 → 出力層
for i, h2_node in enumerate(hidden2_nodes):
    G.add_edge(h2_node, output_node, weight=W3[i, 0])

# ノード配置
pos = {}
for i, node in enumerate(input_nodes):
    pos[node] = (-3, len(input_nodes) - i)
for i, node in enumerate(hidden1_nodes):
    pos[node] = (-1, len(hidden1_nodes) / 2 - i)
for i, node in enumerate(hidden2_nodes):
    pos[node] = (1, len(hidden2_nodes) / 2 - i)
pos[output_node] = (3, 0)

# 描画
plt.figure(figsize=(16, 10))
nx.draw_networkx_nodes(G, pos, node_color='skyblue', node_size=1800)
nx.draw_networkx_labels(G, pos, font_size=10)

edges = G.edges(data=True)
nx.draw_networkx_edges(G, pos, edgelist=edges, arrowstyle='-|>', arrowsize=15)

# 重みラベル（小さな値は省略）
edge_labels = {(u, v): f"{d['weight']:.2f}" for u, v, d in edges if abs(d['weight']) > 0.1}
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8)

plt.title("学習済みニューラルネットの重み", fontsize=14)
plt.axis('off')
plt.tight_layout()
plt.show()