In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# 加载数据集
data = load_breast_cancer()
X = data.data  # 特征矩阵
y = data.target  # 目标变量

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 标准化特征
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print(f"训练集样本数: {X_train.shape[0]}, 测试集样本数: {X_test.shape[0]}")
print(f"特征数: {X_train.shape[1]}")


训练集样本数: 455, 测试集样本数: 114
特征数: 30


In [3]:
X

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

In [4]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# 初始化并训练Logistic回归模型
model = LogisticRegression(random_state=42, max_iter=1000)
model.fit(X_train, y_train)

# 模型评估
y_pred = model.predict(X_test)
print(f"测试集准确率: {accuracy_score(y_test, y_pred):.4f}")
print(classification_report(y_test, y_pred))


测试集准确率: 0.9737
              precision    recall  f1-score   support

           0       0.98      0.95      0.96        43
           1       0.97      0.99      0.98        71

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114



# coordinate descend


$j = argmin_{j} (y^T X e_j - sigmoid(X\beta)^T X e_j)$ \\

$e_{j}$是第j个元素为1. \\

$\Delta \beta_{k} = 2*[y^T X e_j - sigmoid(X\beta)^T X e_j]$ \\

$\beta_{k+1} = \beta_{k}+ \Delta \beta_{k}$ \\

直到收敛

In [11]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from scipy.special import expit as sigmoid  # Sigmoid function

def coordinate_gradient_regression(X, y, max_iter=100, tol=1e-6):
    """
    Coordinate gradient regression algorithm.

    Parameters:
    X : ndarray
        Feature matrix.
    y : ndarray
        Target vector (binary classification).
    max_iter : int
        Maximum number of iterations.
    tol : float
        Convergence tolerance.

    Returns:
    beta : ndarray
        Coefficients after convergence.
    """
    # Initialize beta
    n_samples, n_features = X.shape
    beta = np.zeros(n_features)

    for iteration in range(max_iter):
        beta_old = beta.copy()

        for j in range(n_features):
            # Compute the gradient for the j-th coordinate
            e_j = np.zeros(n_features)
            e_j[j] = 1

            gradient_j = y.T @ (X @ e_j) - sigmoid(X @ beta).T @ (X @ e_j)

            delta_beta_j = 4 * gradient_j

            # Update beta[j]
            beta[j] += delta_beta_j

        # Check for convergence
        if np.linalg.norm(beta - beta_old, ord=2) < tol:
            print(f"Converged in {iteration + 1} iterations.")
            break

    return beta

# Load the breast cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Run the coordinate gradient regression algorithm
beta = coordinate_gradient_regression(X_train, y_train)

# Make predictions on the test set using coordinate gradient regression
y_pred = sigmoid(X_test @ beta) >= 0.5

# Calculate accuracy on the test set using coordinate gradient regression
accuracy_cg = accuracy_score(y_test, y_pred)

print("Learned coefficients (Coordinate Gradient Regression):", beta)
print(f"Test set accuracy (Coordinate Gradient Regression): {accuracy_cg:.4f}")

# Run Logistic Regression using scikit-learn
logistic_model = LogisticRegression(max_iter=100, random_state=42)
logistic_model.fit(X_train, y_train)

# Make predictions on the test set using Logistic Regression
y_pred_lr = logistic_model.predict(X_test)

# Calculate accuracy on the test set using Logistic Regression
accuracy_lr = accuracy_score(y_test, y_pred_lr)

print(f"Test set accuracy (Logistic Regression): {accuracy_lr:.4f}")


Learned coefficients (Coordinate Gradient Regression): [-398.01059521  -69.24884864  205.27129301  115.87819423   -2.25816016
  196.77755559  -70.60900929 -110.09622813   11.17392483  -89.32488687
  -82.89256793   16.43362329   10.36745515  -62.53559611   -7.26255034
  -18.27370985   99.12839139   -2.41986336   53.41931609  -25.84042386
  -20.68196741   -6.84671339   50.46675403    6.75435405  -25.89114871
  -92.89244128  -89.66259194  -38.66202799  -86.75327015  123.83007271]
Test set accuracy (Coordinate Gradient Regression): 0.9386
Test set accuracy (Logistic Regression): 0.9737


In [34]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from scipy.special import expit as sigmoid  # Sigmoid function

def coordinate_gradient_regression(X, y, max_iter=1000, tol=1e-6):
    """
    Coordinate gradient regression algorithm.

    Parameters:
    X : ndarray
        Feature matrix.
    y : ndarray
        Target vector (binary classification).
    max_iter : int
        Maximum number of iterations.
    tol : float
        Convergence tolerance.

    Returns:
    beta : ndarray
        Coefficients after convergence.
    """
    # Initialize beta
    n_samples, n_features = X.shape
    beta = np.zeros(n_features)

    for iteration in range(max_iter):
        beta_old = beta.copy()

        # Compute gradients for all coordinates
        gradients = []
        for j in range(n_features):
            e_j = np.zeros(n_features)
            e_j[j] = 1

            gradient_j = y.T @ (X @ e_j) - sigmoid(X @ beta).T @ (X @ e_j)
            gradients.append(gradient_j)

        # Find the index of the maximum gradient
        j_max = np.argmax(np.abs(gradients))

        # Update only the coordinate with the largest gradient
        delta_beta_j = (1/0.005) * gradients[j_max]
        beta[j_max] += delta_beta_j

        # Check for convergence
        if np.linalg.norm(beta - beta_old, ord=2) < tol:
            print(f"Converged in {iteration + 1} iterations.")
            break
    print(f"end as :{np.linalg.norm(beta - beta_old, ord=2)}")
    return beta

# Load the breast cancer dataset
# data = load_breast_cancer()
# X = data.data
# y = data.target

from sklearn.datasets import make_classification

X, y = make_classification(
    n_samples=100,
    n_features=1000,
    n_informative=5,
    n_redundant=5,
    n_classes=2,
    flip_y=0.2,  # Add label noise
    random_state=42
)


# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Run the coordinate gradient regression algorithm
beta = coordinate_gradient_regression(X_train, y_train)

# Make predictions on the test set using coordinate gradient regression
y_pred = sigmoid(X_test @ beta) >= 0.5

# Calculate accuracy on the test set using coordinate gradient regression
accuracy_cg = accuracy_score(y_test, y_pred)

print("Learned coefficients (Coordinate Gradient Regression):", beta)
print(f"Test set accuracy (Coordinate Gradient Regression): {accuracy_cg:.4f}")

# Run Logistic Regression using scikit-learn
logistic_model = LogisticRegression(max_iter=1000, random_state=42)
logistic_model.fit(X_train, y_train)

# Make predictions on the test set using Logistic Regression
y_pred_lr = logistic_model.predict(X_test)

# Calculate accuracy on the test set using Logistic Regression
accuracy_lr = accuracy_score(y_test, y_pred_lr)

print(f"Test set accuracy (Logistic Regression): {accuracy_lr:.4f}")


end as :4396.087303273384
Learned coefficients (Coordinate Gradient Regression): [   0.            0.            0.            0.            0.
    0.            0.            0.            0.            0.
    0.            0.            0.            0.            0.
    0.            0.            0.            0.            0.
    0.            0.            0.            0.            0.
    0.            0.            0.            0.            0.
    0.            0.            0.            0.            0.
    0.         3045.5078994     0.            0.            0.
    0.            0.            0.            0.            0.
    0.            0.            0.            0.            0.
    0.            0.            0.            0.            0.
    0.            0.            0.            0.            0.
    0.            0.            0.            0.            0.
    0.            0.            0.            0.            0.
    0.            0.            0.   

In [25]:
X

array([[-0.17186341,  0.59624885],
       [ 1.25328273, -0.26541353],
       [ 0.72322405,  0.2319425 ],
       ...,
       [ 1.77095705, -0.50943619],
       [-1.06177158,  0.006786  ],
       [ 0.76117231,  0.65196041]])

In [26]:
y

array([1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0,
       0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1,
       0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1,
       1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1,
       1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1,
       0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0,
       1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
       1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1,
       0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0,
       0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1,
       0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1,

In [280]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from scipy.special import expit as sigmoid  # Sigmoid function

def coordinate_gradient_regression(X, y, max_iter=100, tol=1e-6):
    """
    Coordinate gradient regression algorithm.

    Parameters:
    X : ndarray
        Feature matrix.
    y : ndarray
        Target vector (binary classification).
    max_iter : int
        Maximum number of iterations.
    tol : float
        Convergence tolerance.

    Returns:
    beta : ndarray
        Coefficients after convergence.
    """
    # Initialize beta
    n_samples, n_features = X.shape
    beta = np.zeros(n_features)

    for iteration in range(max_iter):
        beta_old = beta.copy()

        for j in range(n_features):
            # Compute the gradient for the j-th coordinate
            e_j = np.zeros(n_features)
            e_j[j] = 1

            gradient_j = y.T @ (X @ e_j) - sigmoid(X @ beta).T @ (X @ e_j)

            delta_beta_j = 4 * gradient_j

            # Update beta[j]
            beta[j] += delta_beta_j

        # Check for convergence
        if np.linalg.norm(beta - beta_old, ord=2) < tol:
            print(f"Converged in {iteration + 1} iterations.")
            break

    return beta

# Load the breast cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Run the coordinate gradient regression algorithm
beta = coordinate_gradient_regression(X_train, y_train)

# Make predictions on the test set using coordinate gradient regression
y_pred = sigmoid(X_test @ beta) >= 0.5

# Calculate accuracy on the test set using coordinate gradient regression
accuracy_cg = accuracy_score(y_test, y_pred)

print("Learned coefficients (Coordinate Gradient Regression):", beta)
print(f"Test set accuracy (Coordinate Gradient Regression): {accuracy_cg:.4f}")

# Run Logistic Regression using scikit-learn
logistic_model = LogisticRegression(max_iter=100, random_state=42)
logistic_model.fit(X_train, y_train)

# Make predictions on the test set using Logistic Regression
y_pred_lr = logistic_model.predict(X_test)

# Calculate accuracy on the test set using Logistic Regression
accuracy_lr = accuracy_score(y_test, y_pred_lr)

print(f"Test set accuracy (Logistic Regression): {accuracy_lr:.4f}")


Learned coefficients (Coordinate Gradient Regression): [-398.01059521  -69.24884864  205.27129301  115.87819423   -2.25816016
  196.77755559  -70.60900929 -110.09622813   11.17392483  -89.32488687
  -82.89256793   16.43362329   10.36745515  -62.53559611   -7.26255034
  -18.27370985   99.12839139   -2.41986336   53.41931609  -25.84042386
  -20.68196741   -6.84671339   50.46675403    6.75435405  -25.89114871
  -92.89244128  -89.66259194  -38.66202799  -86.75327015  123.83007271]
Test set accuracy (Coordinate Gradient Regression): 0.9386
Test set accuracy (Logistic Regression): 0.9737


In [38]:
import pandas as pd
import numpy as np

# 读取数据文件
file_path = "/content/drive/My Drive/Colab Notebooks/Logistic_regression_coordinate/australian.txt"

# 初始化存储
X = []
y = []

# 解析文件内容
with open(file_path, 'r') as f:
    for line in f:
        parts = line.strip().split()
        y.append(int(parts[0]))  # 提取目标变量 y
        features = {int(kv.split(':')[0]): float(kv.split(':')[1]) for kv in parts[1:]}  # 提取特征
        X.append(features)

# 创建 Pandas DataFrame 并填充缺失值为 0
X = pd.DataFrame(X).fillna(0).sort_index(axis=1)  # 按列排序
# X.insert(0, 'Intercept', 1)
X.columns = X.columns.astype(str)
# 转换目标变量为 NumPy 数组
y = np.array(y)

# 将目标变量从 1 和 -1 转换为 1 和 0
y = np.where(y == -1, 0, y)

# 输出整理好的 X 和 y
print("Feature matrix X (first 5 rows):")
print(X.head())
print("\nTarget variable y (first 5 values):")
print(y[:5])

# 如果需要将 X 和 y 写入文件，可以使用以下代码：
# df_X.to_csv("X.csv", index=False)
# np.savetxt("y.csv", y, delimiter=",")


Feature matrix X (first 5 rows):
     1         2         3    4         5     6         7    8    9        10  \
0  1.0 -0.749474 -0.181429  0.0 -0.538462 -0.25 -0.888772 -1.0 -1.0 -1.000000   
1 -1.0 -0.731729 -0.500000  0.0  0.076923 -0.25 -0.988421 -1.0 -1.0 -1.000000   
2 -1.0 -0.523910 -0.875000 -1.0 -0.538462 -0.25 -0.912281 -1.0 -1.0 -1.000000   
3 -1.0 -0.761805 -0.178571 -1.0 -0.384615 -0.50 -1.000000  1.0  1.0 -0.671642   
4  1.0 -0.806917 -0.416429  0.0 -0.230769 -0.25 -0.862456  1.0  1.0 -0.582090   

    11   12    13       14  
0  1.0  0.0 -0.90 -0.97576  
1 -1.0  0.0 -0.84 -1.00000  
2  1.0  0.0 -0.72 -1.00000  
3  1.0  0.0 -1.00 -1.00000  
4 -1.0  0.0 -0.94 -0.99684  

Target variable y (first 5 values):
[0 0 0 1 1]


$\forall l\in \{1,\ldots,s\}$

$\Delta \beta_j^{k+l} = \frac{y^T X e_j^{k+l} - e_j^{k+l} X^T \text{sig}(X\beta^k) - \sum_{i=1}^{l-1} e_j^{k+l} X^T \text{sig}(X\beta^k) \odot (1 - \text{sig}(X\beta^k)) \odot X e_j^{k+i} \Delta \beta_j^{k+i}}
{e_j^{k+l} X^T \text{sig}(X\beta^k) \odot (1 - \text{sig}(X\beta^k)) \odot X e_j^{k+l}},$

where $sig(x)$ is a sigmoid function.

$\textbf{Algorithm:}$ \\
选定一次交流是s.


sample s features from p features, each correspond to a basic base($e_{j}^{k+l}$ 表示第$l$个抽取的feature位置为1其余为0的基).

$
1_s = (e_j^k, \ldots, e_j^{k+s}) \in \mathbb{R}^{p \times (s+1)} \\
X 1_s = (X e_j^k, \ldots, X e_j^{k+s}) \in \mathbb{R}^{n \times (s+1)} \\
\text{sig}(X\beta^k) \in \mathbb{R}^{n \times 1}$

$
W = \text{sig}(X\beta^k) \odot (1 - \text{sig}(X\beta^k)) \in \mathbb{R}^{n \times 1} \\
$
$
A = (X 1_s)^T \text{sig}(X\beta^k)_{(s+1) \times 1} \quad \Rightarrow \quad A_{l} = e_j^{k+l} X^T \text{sig}(X\beta^k)
$
$
B = (X 1_s)^T (W \odot (X 1_s)) \in \mathbb{R}^{(s+1) \times (s+1)} \quad
$

具体来说$B$中, $W \odot (X 1_s)$需要把$W$重复(广播)为$(W,W,\dots,W)_{n\times s}$

$\Rightarrow B_{l,i} = e_j^{k+l} X^T \text{sig}(X\beta^k) \odot (1 - \text{sig}(X\beta^k)) \odot X e_j^{k+i}
$

for $l$ in $s$:

$\quad
\Delta \beta_j^{k+l} = \frac{y^T X e_j^{k+l} - A_{l} - \sum_{i=0}^{l-1} B_{l,i} \Delta \beta_j^{k+i}}{B_{l,l}}
$

$\beta_j^{k+s} = \beta_{j}^{k}+\sum_{i=0}^{s-1}\Delta \beta_j^{k+i}$

In [6]:
import numpy as np
from sklearn.datasets import make_classification
import pandas as pd
from scipy.special import expit  # sigmoid
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
import time
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

def logistic_gradient_descent(X, y, beta, 、
                              beta_0, s, max_iter=2000, tol= 1e-4, lambda_para=0.2, alpha_para = 0.5):
    """
    Logistic Gradient Descent Algorithm with s-step updates.

    Parameters:
    X : np.ndarray
        Feature matrix (n x p).
    y : np.ndarray
        Target vector (n x 1).
    beta : np.ndarray
        Initial coefficients (p x 1).
    s : int
        Number of features to sample and update at each step.
    max_iter : int
        Maximum number of iterations.
    tol : float
        Tolerance for convergence.

    Returns:
    beta : np.ndarray
        Optimized coefficients.
    """
    # n = 100
    # p = 10
    # s = 5
    n, p = X.shape
    epsilon = 1e-8
    iter = 0
    # 每一轮其实是循环大致p/s次
    for iteration in range(max_iter*round(p/s + 1)):
        # # Sample s features from p features
        # sampled_indices = np.random.choice(p, s, replace=True)
        start = (iteration * s) % p
        end = start + s
        # print(end)
        flag = 0

        # 根据范围选择特征
        if end < p:
            flag = 0
            sampled_indices = np.arange(start, end)  # 在范围内直接取
        elif end == p:
            flag = 1
            iter += 1
            print(iter)
            sampled_indices = np.arange(start, end)  # 在范围内直接取
        else:
            # 超出范围时取余（循环选择）
            flag = 1
            iter += 1
            sampled_indices = np.concatenate((np.arange(start, p), np.arange(0, end % p)))
        # print(sampled_indices)
        # Construct 1_s and X_{1_s}
        one_s = np.eye(p)[:, sampled_indices]  # (p x s)
        #print(one_s)

        X_1s = X @ one_s  # (n x s)
        # print(X_1s)

        #if iteration == 0:
        #   beta_old = 0  # (n x 1)
        # 初始化beta_old
        if iteration == 0:
          beta_old = beta.copy()

        if flag == 1:
        # 说明已经循环完一轮了
          # print(flag)
          print(np.linalg.norm(beta_old - beta))
          if np.linalg.norm(beta_old - beta) < tol*np.sqrt(p):
            print(f"ending iteration:{iter}")
            # break
          # update beta_old
          beta_old = beta.copy()

        # Compute sigmoid
        sigmoid_X_beta = expit(beta_0 + X @ beta)  # (n x 1)
        # print("beta_0:", beta_0)
        # print("sigmoid_X_beta",sigmoid_X_beta)
        # # Check convergence
        # if np.linalg.norm(beta - beta_old) < tol and iteration > 2:
        #     print(f"Converged in {iteration + 1} iterations.")
        #     break


        # Compute W
        W = sigmoid_X_beta * (1 - sigmoid_X_beta)  # (n x 1)
        # print(W.shape)
        # print("W",W)
        delta_beta_0 = (np.sum(y)-np.sum(sigmoid_X_beta))/(np.sum(W)+epsilon)

        # if np.abs(delta_beta_0)<= np.abs(beta_0):

        beta_0 += delta_beta_0

        # Compute A
        A = (X_1s.T @ sigmoid_X_beta)  # (s x 1)
        #print(A.shape)

        # Compute B
        B = (X_1s.T @ (np.tile(W, (1, s)) * X_1s))  # (s x s)
        #print(B.shape)

        # Update beta for each feature in s
        delta_beta = np.zeros((p, 1))
        delta_beta_elastic = np.zeros((p, 1))
        for l in range(s):
            B_l_l = B[l, l]
            A_l = A[l]

            summation_term = 0
            for i in range(l):
                B_l_i = B[l, i]
                summation_term += B_l_i * delta_beta[sampled_indices[i]]
            #print(summation_term)


            # 弹性网
            delta_beta_l = ((1/n)*(y.T @ X_1s[:, l] - A_l - summation_term)-lambda_para*(1-alpha_para)*beta[sampled_indices[l]]) / ((1/n)*B_l_l + lambda_para*(1-alpha_para)+epsilon)


            # delta_beta_l = ((1/n)*(y.T @ X_1s[:, l] - A_l - summation_term)-lambda_para*beta[sampled_indices[l]]) / ((1/n)*B_l_l + lambda_para)

            #print(f"delta_beta_l:{delta_beta_l}")
            # delta_beta[sampled_indices[l]] = delta_beta_l
            # beta[sampled_indices[l]] += delta_beta_l

            # 弹性网
            # print(delta_beta_l)
            # print(lambda_para*alpha_para/ ((1/n)*B_l_l + lambda_para*(1-alpha_para)))
            # print("*"*80)
            l1_term = lambda_para*alpha_para/ ((1/n)*B_l_l + lambda_para*(1-alpha_para)+epsilon)
            # if delta_beta_l < 0:
            #     print(f"less than 0{delta_beta_l}")
            #     print(f"{beta[sampled_indices[l]]}")
            #     print(f"l1_term:{l1_term}")
            delta_beta_elastic_l = 0

            if np.abs(beta[sampled_indices[l]] + delta_beta_l) <= l1_term:
                delta_beta_elastic_l = beta[sampled_indices[l]]
                beta[sampled_indices[l]] = 0
                # print(f"type 1:{beta[sampled_indices[l]]}")
            elif beta[sampled_indices[l]] + delta_beta_l < 0:
                delta_beta_elastic_l = delta_beta_l + l1_term
                beta[sampled_indices[l]] += delta_beta_elastic_l

                # print(f"type 2:{beta[sampled_indices[l]]}")
            else:
                delta_beta_elastic_l = delta_beta_l - l1_term
                beta[sampled_indices[l]] += delta_beta_elastic_l
               # print(f"type 3:{beta[sampled_indices[l]]}")
            # print(delta_beta_elastic_l )
            delta_beta_elastic[sampled_indices[l]] = delta_beta_elastic_l


        #print(beta)
        # # Check convergence
        # if np.linalg.norm(delta_beta_elastic) < tol:
        #     print(f"Converged in {iteration + 1} iterations.")
        #     print(np.count_nonzero(delta_beta_elastic))

        #     break

    return beta, beta_0

# Example usage
np.random.seed(42)
#n, p = 100, 10
# Load the breast cancer dataset
# data = load_breast_cancer()
# X = data.data
# y = data.target
##################################################################################
# 乳腺癌
##################################################################################
# from sklearn.datasets import load_breast_cancer
# from sklearn.model_selection import train_test_split
# from sklearn.linear_model import LogisticRegressionCV

# # 加载数据
# data = load_breast_cancer()
# X, y = data.data, data.target

##################################################################################
# 随机生成
##################################################################################
# # Generate a synthetic dataset for logistic regression
# X, y = make_classification(
#     n_samples=500,         # 数据量适中
#     n_features = 2000,        # 特征很多
#     n_informative=40,      # 有用特征的数量
#     n_redundant=10,        # 冗余特征数量
#     n_classes=2,            # 二分类问题
#     flip_y=0.01,            # 少量标签噪声
#     random_state=42         # 保证可复现
# )

# Split the dataset into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 假设 X 是特征数据，y 是目标变量
# 确保 X 和 y 的顺序一致
n_samples = len(X)
split_index = int(n_samples * 0.8)  # 按 80% 划分

# 按顺序切分数据
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

# # Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# X_train = X_train.values
# X_test = X_test.values
n, p = X.shape
beta_init = np.zeros((p, 1))
beta_0_init = np.log(np.mean(y_train) / (1 - np.mean(y_train)))
s = 64

# Measure time for coordinate gradient descent
start_time_cg = time.time()
beta, beta_0 = logistic_gradient_descent(X_train, y_train, beta_init,beta_0_init, s)
y_pred_cg = expit(beta_0 + X_test @ beta) >= 0.5
accuracy_cg = accuracy_score(y_test, y_pred_cg)
cg_time = time.time() - start_time_cg
print(f"Coordinate Gradient Descent beta:{beta_0},{beta}")
print(f"Coordinate Gradient Descent Accuracy: {accuracy_cg:.4f}")
print(f"Coordinate Gradient Descent Time: {cg_time:.4f} seconds")


# # Measure time for coordinate gradient descent
# start_time_cg = time.time()
# beta, beta_0 = logistic_gradient_descent(X_train, y_train, beta_init,beta_0_init, s=1)
# y_pred_cg = expit(beta_0 + X_test @ beta) >= 0.5
# accuracy_cg = accuracy_score(y_test, y_pred_cg)
# cg_time = time.time() - start_time_cg
# #print(f"Coordinate Gradient Descent bera:{beta}")
# print(f"Coordinate Gradient Descent Accuracy: {accuracy_cg:.4f}")
# print(f"Coordinate Gradient Descent Time: {cg_time:.4f} seconds")
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error, r2_score
import time

# 定义超参数
alpha = 0.2
l1_ratio = 0.5

# 开始计时
start_time_en = time.time()

# 创建 ElasticNet 模型
elastic_net_model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, max_iter=2000, random_state=42)

# 拟合模型
elastic_net_model.fit(X_train, y_train)

# 进行预测
y_pred_en = elastic_net_model.predict(X_test) >= 0.5

# 计算准确率
accuracy_en = accuracy_score(y_test, y_pred_en)
print(f"ElasticNet Accuracy: {accuracy_en:.4f}")

# 计算性能指标
mse = mean_squared_error(y_test, y_pred_en)
r2 = r2_score(y_test, y_pred_en)

# 计算时间
en_time = time.time() - start_time_en

# 打印结果
accuracy_cgd = accuracy_score(y_test, y_pred_en)

print(f"Coordinate Gradient Descent Accuracy: {accuracy_cgd:.4f}")
print(f"ElasticNet Mean Squared Error: {mse:.4f}")
print(f"ElasticNet R2 Score: {r2:.4f}")
print(f"ElasticNet Time: {en_time:.4f} seconds")

# Measure time for Logistic Regression (default solver)
start_time_lr = time.time()
logistic_model = LogisticRegression(max_iter=2000, random_state=42)
logistic_model.fit(X_train, y_train)
y_pred_lr = logistic_model.predict(X_test)
accuracy_lr = accuracy_score(y_test, y_pred_lr)
lr_time = time.time() - start_time_lr
print(f"Logistic Regression Accuracy: {accuracy_lr:.4f}")
print(f"Logistic Regression Time: {lr_time:.4f} seconds")

# Measure time for Logistic Regression with liblinear solver
start_time_lr_liblinear = time.time()
# # # L1 正则化 (lambda=0.1)
# logistic_model_liblinear = LogisticRegression(
#     solver='liblinear',
#     penalty='l1',  # 或 'l2'，根据需要选择
#     C=2,          # 设置 lambda 的倒数
#     max_iter=2000,
#     random_state=42
# )
# logistic_model_liblinear = LogisticRegression(
#     solver='saga',
#     penalty='elasticnet',  # 指定 Elastic Net 正则化
#     l1_ratio=0.5,          # 设置 alpha 参数
#     C=5,                  # 设置 lambda 的倒数
#     max_iter=2000,
#     random_state=42
# )
logistic_model_liblinear = LogisticRegression(solver='liblinear', max_iter=2000, random_state=42)
logistic_model_liblinear.fit(X_train, y_train)
y_pred_lr_liblinear = logistic_model_liblinear.predict(X_test)
accuracy_lr_liblinear = accuracy_score(y_test, y_pred_lr_liblinear)
lr_liblinear_time = time.time() - start_time_lr_liblinear
print(f"Logistic Regression (liblinear) Accuracy: {accuracy_lr_liblinear:.4f}")
print(f"Logistic Regression (liblinear) Time: {lr_liblinear_time:.4f} seconds")


1.5982320610985092
1.87479644425769
1.8932635940251832
4
2.5904435274071305
2.3277611416824726
1.4538540373261934
0.8486619171886304
8
0.45792592246427494
0.26269099184108863
0.16873962003440846
0.1095039475349596
12
0.09003201615548473
0.08032515622169283
0.07839724564488565
0.057272578229238195
16
0.04864966026167656
0.04006943129570274
0.03421435803146269
0.029157312431489262
20
0.02386706857041187
0.019781078805892714
0.019430102394280974
0.01754788882534416
24
0.014870981146735046
0.012020904588160337
0.009554197470023243
0.008961589927570587
28
0.007441769875336958
0.006129940476807647
0.00644757945749193
0.005348184472694368
32
0.004473678731723086
0.0037368269383435335
ending iteration:33
0.0029482071070836896
ending iteration:34
0.002685228337825985
ending iteration:35
36
0.0022430070488211515
ending iteration:36
0.0017792586275343118
ending iteration:37
0.0019388729503326105
ending iteration:38
0.0016071571227062175
ending iteration:39
40
0.0013133530235975043
ending iteratio

KeyboardInterrupt: 

In [261]:
np.log(np.mean(y_train) / (1 - np.mean(y_train)))

-0.7239188392266992

In [210]:
np.random.choice(3, 4, replace=True)

array([0, 2, 1, 1])

In [246]:
# 将其写入 txt 文件
file_path = "/content/drive/My Drive/Colab Notebooks/Logistic_regression_coordinate/beta_values.txt"  # 保存的文件名

with open(file_path, "w") as f:
    f.write(f"beta_0: {beta_0}\n")
    f.write("beta: " + ", ".join(map(str, beta)) + "\n")

print(f"Values saved to {file_path}")

Values saved to /content/drive/My Drive/Colab Notebooks/Logistic_regression_coordinate/beta_values.txt


In [243]:
beta[1770]

array([-0.02778217])

In [3]:
import pandas as pd
import numpy as np

# 定义文件路径
file_path = "/content/drive/My Drive/Colab Notebooks/Logistic_regression_coordinate/colon-cancer.txt"

# 初始化存储
X = []
y = []

# 解析文件内容
with open(file_path, 'r') as f:
    for line in f:
        # 分割每一行数据
        parts = line.strip().split()
        # 提取目标变量 y（第一列），将浮点数转换为整数
        y.append(int(float(parts[0])))
        # 提取特征并存入字典，键为特征索引，值为特征值
        features = {int(kv.split(':')[0]): float(kv.split(':')[1]) for kv in parts[1:]}
        X.append(features)

# 转换为 Pandas DataFrame，填充缺失值为 0
X = pd.DataFrame(X).fillna(0).sort_index(axis=1)  # 按列排序

# 将列名转换为字符串（保持一致性）
X.columns = X.columns.astype(str)

# 转换目标变量为 NumPy 数组
y = np.array(y)

# 将目标变量从 -1 和 1 转换为 0 和 1
y = np.where(y == -1, 0, y)

# 检查结果
print("Feature matrix X (first 5 rows):")
print(X.head())
print("\nTarget variable y (first 5 values):")
print(y[:5])

# 保存特征矩阵 X 和目标变量 y 到文件（如果需要）
# X.to_csv("X.csv", index=False)
# np.savetxt("y.csv", y, delimiter=",", fmt="%d")

# 完成
print("\nData processing complete.")


Feature matrix X (first 5 rows):
          1         2         3         4         5         6         7  \
0  2.080750  1.099070  0.927763  1.029080 -0.130763  1.265460 -0.436286   
1  1.109460  0.786453  0.445560 -0.146323 -0.996316  0.555759  0.290734   
2 -0.676530  1.693100  1.559250  1.559980 -0.982179 -1.358510 -1.313990   
3  0.534396  1.677540  1.489030  0.778605 -0.183776 -1.116850 -1.487560   
4 -1.018900  0.511080  0.755641  1.013820  0.529899  0.160440 -0.087055   

          8         9        10  ...      1991      1992      1993      1994  \
0  0.728881  2.107980  1.359870  ... -0.825403 -0.138451  0.382957  0.876697   
1 -0.145259  1.132660  0.559093  ... -1.056290 -0.205499 -1.815370  0.324373   
2 -0.455067  0.295214  0.290694  ...  1.242970  1.230160 -2.039000  2.366090   
3 -0.579511  0.292683  1.345480  ...  0.559852 -0.593149 -4.440580  1.720710   
4  1.295290  0.458736  0.714082  ...  0.227110  0.497628 -0.083921 -0.382733   

       1995      1996      1997    

In [297]:
beta_init = np.zeros((p, 1))
beta_0_init = np.log(np.mean(y_train) / (1 - np.mean(y_train)))
# Function to calculate average time
def calculate_average_time(s, n_runs=30):
    times = []
    accuracies = []
    for _ in range(n_runs):
        n, p = X.shape
        beta_init = np.zeros((p, 1))
        beta_0_init = np.log(np.mean(y_train) / (1 - np.mean(y_train)))
        start_time_cg = time.time()
        beta, beta_0 = logistic_gradient_descent(X_train, y_train, beta_init, beta_0_init, s)
        y_pred_cg = expit(beta_0 + X_test @ beta) >= 0.5
        accuracy_cg = accuracy_score(y_test, y_pred_cg)
        cg_time = time.time() - start_time_cg
        times.append(cg_time)
        accuracies.append(accuracy_cg)
    avg_time = np.mean(times)
    avg_accuracy = np.mean(accuracies)
    return avg_time, avg_accuracy

# Running for s=1 and s=6
s_values = [16, 32, 64]
results = {}
for s in s_values:
    avg_time, avg_accuracy = calculate_average_time(s)
    results[s] = {"average_time": avg_time}
print(results)

1
0.9249638065695058
2
0.7925341585537824
3
0.3959018324458983
4
0.22520167865025506
5
0.20116028173727987
6
0.15775455344477649
7
0.10279674836266864
8
0.07740388403096886
9
0.06162303001975257
10
0.0492742177439111
11
0.04036891106179438
12
0.03295739309312455
13
0.02807344869076468
14
0.023851962171635863
15
0.020168454988415734
16
0.017681324166847822
17
0.015335781932343756
18
0.013131778917902208
19
0.011162039462061686
20
0.009514744962724244
21
0.008153897852272963
22
0.006931843468772974
23
0.00616570304355728
24
0.005142837162228204
25
0.0043627468656668176
ending iteration:25
1
0.9249638065695058
2
0.7925341585537824
3
0.3959018324458983
4
0.22520167865025506
5
0.20116028173727987
6
0.15775455344477649
7
0.10279674836266864
8
0.07740388403096886
9
0.06162303001975257
10
0.0492742177439111
11
0.04036891106179438
12
0.03295739309312455
13
0.02807344869076468
14
0.023851962171635863
15
0.020168454988415734
16
0.017681324166847822
17
0.015335781932343756
18
0.013131778917902208


# Gisette data

In [20]:
import pandas as pd
import numpy as np

# 读取数据文件
file_path = "/content/drive/My Drive/Colab Notebooks/Logistic_regression_coordinate/gisette_scale.txt"

# 初始化存储
X = []
y = []

# 解析文件内容
with open(file_path, 'r') as f:
    for line in f:
        parts = line.strip().split()
        y.append(int(parts[0]))  # 提取目标变量 y
        features = {int(kv.split(':')[0]): float(kv.split(':')[1]) for kv in parts[1:]}  # 提取特征
        X.append(features)

# 创建 Pandas DataFrame 并填充缺失值为 0
X = pd.DataFrame(X).fillna(0).sort_index(axis=1)  # 按列排序
X.insert(0, 'Intercept', 1)
X.columns = X.columns.astype(str)
# 转换目标变量为 NumPy 数组
y = np.array(y)

# 将目标变量从 1 和 -1 转换为 1 和 0
y = np.where(y == -1, 0, y)

# 输出整理好的 X 和 y
print("Feature matrix X (first 5 rows):")
print(X.head())
print("\nTarget variable y (first 5 values):")
print(y[:5])

# 如果需要将 X 和 y 写入文件，可以使用以下代码：
# df_X.to_csv("X.csv", index=False)
# np.savetxt("y.csv", y, delimiter=",")


Feature matrix X (first 5 rows):
   Intercept         1    2         3    4    5    6    7         8    9  ...  \
0          1  0.101101 -1.0 -0.009009 -1.0 -1.0 -1.0 -1.0  0.953954 -1.0  ...   
1          1 -1.000000 -1.0 -1.000000 -1.0 -1.0 -1.0 -1.0  0.953954 -1.0  ...   
2          1 -1.000000 -1.0 -1.000000 -1.0 -1.0 -1.0 -1.0 -1.000000 -1.0  ...   
3          1 -1.000000 -1.0  0.485485 -1.0 -1.0 -1.0 -1.0  0.369369 -1.0  ...   
4          1 -1.000000 -1.0 -1.000000 -1.0 -1.0 -1.0 -1.0  0.217217 -1.0  ...   

       4991      4992  4993      4994      4995  4996      4997  4998  4999  \
0 -1.000000 -1.000000  -1.0  0.983984  0.983984  -1.0 -1.000000  -1.0  -1.0   
1 -1.000000 -0.049049  -1.0  0.983984 -1.000000  -1.0  0.983984  -1.0  -1.0   
2 -1.000000 -1.000000  -1.0 -1.000000 -1.000000  -1.0 -1.000000  -1.0  -1.0   
3 -1.000000 -1.000000  -1.0 -1.000000 -1.000000  -1.0  0.349349  -1.0  -1.0   
4  0.983984 -1.000000  -1.0  0.657658 -1.000000  -1.0 -1.000000  -1.0  -1.0   

     

In [24]:
X

Unnamed: 0,Intercept,1,2,3,4,5,6,7,8,9,...,4991,4992,4993,4994,4995,4996,4997,4998,4999,5000
0,1,0.101101,-1.0,-0.009009,-1.0,-1.0,-1.000000,-1.0,0.953954,-1.0,...,-1.000000,-1.000000,-1.0,0.983984,0.983984,-1.0,-1.000000,-1.0,-1.000000,0.967968
1,1,-1.000000,-1.0,-1.000000,-1.0,-1.0,-1.000000,-1.0,0.953954,-1.0,...,-1.000000,-0.049049,-1.0,0.983984,-1.000000,-1.0,0.983984,-1.0,-1.000000,-1.000000
2,1,-1.000000,-1.0,-1.000000,-1.0,-1.0,-1.000000,-1.0,-1.000000,-1.0,...,-1.000000,-1.000000,-1.0,-1.000000,-1.000000,-1.0,-1.000000,-1.0,-1.000000,-1.000000
3,1,-1.000000,-1.0,0.485485,-1.0,-1.0,-1.000000,-1.0,0.369369,-1.0,...,-1.000000,-1.000000,-1.0,-1.000000,-1.000000,-1.0,0.349349,-1.0,-1.000000,0.677678
4,1,-1.000000,-1.0,-1.000000,-1.0,-1.0,-1.000000,-1.0,0.217217,-1.0,...,0.983984,-1.000000,-1.0,0.657658,-1.000000,-1.0,-1.000000,-1.0,-1.000000,-1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5995,1,-1.000000,-1.0,-1.000000,-1.0,-1.0,-1.000000,-1.0,-1.000000,-1.0,...,0.983984,-1.000000,-1.0,-1.000000,-1.000000,-1.0,0.567568,-1.0,-1.000000,-1.000000
5996,1,-1.000000,-1.0,-1.000000,-1.0,-1.0,-1.000000,-1.0,-1.000000,-1.0,...,-1.000000,-1.000000,-1.0,-1.000000,-1.000000,-1.0,0.843844,-1.0,0.773774,-1.000000
5997,1,-1.000000,-1.0,-1.000000,-1.0,-1.0,0.517518,-1.0,-1.000000,-1.0,...,-1.000000,0.803804,-1.0,-1.000000,-1.000000,-1.0,0.961962,-1.0,-1.000000,-1.000000
5998,1,-1.000000,-1.0,-1.000000,-1.0,-1.0,-1.000000,-1.0,-1.000000,-1.0,...,-1.000000,-1.000000,-1.0,-1.000000,0.381381,-1.0,-1.000000,-1.0,-1.000000,-1.000000


In [224]:
y_pred_lr

array([ 1, -1,  1,  1, -1, -1,  1, -1, -1, -1,  1, -1,  1, -1, -1,  1,  1,
        1, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1, -1,
       -1,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1, -1, -1,  1,  1,
       -1, -1,  1, -1, -1, -1,  1, -1, -1, -1,  1,  1, -1, -1, -1,  1,  1,
       -1, -1,  1, -1, -1, -1, -1,  1,  1, -1,  1,  1,  1, -1, -1, -1, -1,
        1, -1, -1,  1,  1,  1, -1, -1, -1, -1,  1,  1,  1,  1, -1,  1,  1,
        1, -1,  1, -1,  1,  1,  1,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        1,  1,  1,  1, -1, -1, -1, -1, -1, -1, -1,  1,  1,  1, -1, -1, -1,
        1,  1])

# Original

In [25]:
!pip install pydicom


Collecting pydicom
  Downloading pydicom-3.0.1-py3-none-any.whl.metadata (9.4 kB)
Downloading pydicom-3.0.1-py3-none-any.whl (2.4 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.4 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.4/2.4 MB[0m [31m11.3 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━[0m [32m2.1/2.4 MB[0m [31m29.4 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.4/2.4 MB[0m [31m28.6 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydicom
Successfully installed pydicom-3.0.1


In [26]:
import pydicom

# 加载 DICOM 文件
dicom_file = pydicom.dcmread("path/to/dicom/file.dcm")
print(dicom_file)


FileNotFoundError: [Errno 2] No such file or directory: 'path/to/dicom/file.dcm'