In [1]:
import inspect
import setred_package

In [3]:
import numpy as np
from sklearn.utils import check_array
from sklearn.datasets import load_iris

# Load dataset
X, y = load_iris(return_X_y=True)

# Simulate unlabeling 30% of the samples
rng = np.random.RandomState(42)
mask_unlabeled = rng.rand(len(y)) < 0.3
y[mask_unlabeled] = -1  # unlabeled samples

# Ensure input arrays are valid and safe to process
X = check_array(X)
y = check_array(y, ensure_2d=False, dtype=y.dtype.type)

# Separate labeled and unlabeled samples
X_label = X[y != y.dtype.type(-1)]
y_label = y[y != y.dtype.type(-1)]
X_unlabel = X[y == y.dtype.type(-1)]

# Print results
print(f"Total samples: {len(y)}")
print(f"Labeled samples: {len(y_label)}")
print(f"Unlabeled samples: {len(X_unlabel)}")

Total samples: 150
Labeled samples: 99
Unlabeled samples: 51


In [None]:
import numpy as np
from sklearn.utils import check_X_y

X = [[1, 2], [3, 4], [5, 6]]
y = [0, 1, 0]

X_checked, y_checked = check_X_y(X, y)

print(X_checked.shape)  # (3, 2)
print(y_checked.shape) 

In [None]:
import pandas as pd
X_df = pd.DataFrame(X_checked, columns=['feature1', 'feature2'])
isinstance(X_df, pd.DataFrame)

In [None]:
class Calculator:
    def __init__(self, a, b):
        """Initialize the calculator with two numbers."""
        self.a = a
        self.b = b

    def add(self):
        """Return the sum of the two stored numbers."""
        return self.a + self.b

    def subtract(self):
        """Return the result of subtracting the second number from the first."""
        return self.a - self.b

# Example usage
calc = Calculator(10, 5)

print(calc.add())      # Output: 15
print(calc.subtract()) 

In [None]:
def calculate_prior_probability(y):
    """Calculate the priori probability of each label

    Parameters
    ----------
    y : array-like of shape (n_samples,)
        array of labels

    Returns
    -------
    class_probability: dict
        dictionary with priori probability (value) of each label (key)
    """
    unique, counts = np.unique(y, return_counts=True)
    u_c = dict(zip(unique, counts))
    instances = len(y)
    for u in u_c:
        u_c[u] = float(u_c[u] / instances)
    return u_c


In [None]:
# Create an array with 100 values between 0 and 2
y = np.random.choice([0, 1, 2], size=150)
y_probabilities = calculate_prior_probability(y)
y_ = np.random.choice([0, 1, 2], size=30)


In [None]:
sort_idx = np.argsort(list(y_probabilities.keys()))
sort_idx

In [None]:
y_.shape

In [None]:
idx = np.searchsorted(np.array(list(y_probabilities.keys())), y_, sorter = sort_idx )
idx

In [None]:
p_wrong = 1 - np.asarray(np.array(list(y_probabilities.values())))[sort_idx][idx]
p_wrong

In [None]:
np.repeat(p_wrong, weights.shape[1]).shape[0]/y_.shape[0]

In [None]:
import numpy as np
random_state = np.random.RandomState(42)
iid_random = random_state.binomial(
                1, np.repeat(p_wrong, weights.shape[1]).reshape(weights.shape)
            )
np.repeat(p_wrong, weights.shape[1]).reshape(weights.shape)[0,:]


In [None]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris

# Load data
X, y = load_iris(return_X_y=True)

# Train a base classifier
clf = LogisticRegression(max_iter=200)
clf.fit(X, y)

# Let's simulate U_ as 3 new samples from the dataset
U_ = X[50:54]  # Shape: (3, 4)

# Apply your code manually
raw_predictions = clf.predict_proba(U_)
predictions = np.max(raw_predictions, axis=1)       # Highest class probability for each sample
class_predicted = np.argmax(raw_predictions, axis=1) 

In [None]:
raw_predictions

In [None]:
np.max(raw_predictions, axis = 1 )

In [None]:
np.argmax(raw_predictions, axis = 1)

In [None]:
random_variables = np.random.rand(100, 4)
predictions = np.max(random_variables, axis=1)
class_predicted = np.argmax(random_variables, axis=1)
indexes = predictions.argsort()[-10:]  # Get indices of the top 3 predictions

In [None]:
classes_ = np.array(['A', 'B', 'C'])
class_predicted = np.array([2, 0, 1])  # Class indices
indexes = [0, 2]                       # We're interested in the first and third

# class_predicted[indexes] → [2, 1]
# classes_[2] → 'C', classes_[1] → 'B'

# Result:
y_ = np.array(['C', 'B'])


In [None]:
raw_predictions.shape
new_instance = np.array([1,2,3])
new_instance = new_instance.reshape(1,3)
np.concatenate((raw_predictions,new_instance), axis=0)


In [None]:
from sklearn.datasets import load_iris
from sklearn.neighbors import kneighbors_graph
import networkx as nx
import matplotlib.pyplot as plt

# Load dataset
X, y = load_iris(return_X_y=True)

# Build kneighbors graph
A = kneighbors_graph(X, n_neighbors=3, mode='distance', include_self=False)


In [None]:
# Convert sparse matrix to a NetworkX graph
G = nx.from_scipy_sparse_array(A)


In [None]:
# Option 1: use a layout (e.g., spring layout)
pos = nx.spring_layout(G, seed=42)  # Optional: reproducible layout

# Option 2: use actual data for positions (e.g., first two features)
# pos = {i: X[i, :2] for i in range(X.shape[0])}

plt.figure(figsize=(8, 6))
nx.draw(G, pos, node_size=50, node_color=y, cmap=plt.cm.viridis, with_labels=False)
plt.title("k-Nearest Neighbors Graph (k=3)")
plt.show()


In [None]:
weights = A.toarray()

In [None]:
Iweights = np.divide(1,weights, out=np.zeros_like(weights), where= weights!=0 )

In [None]:
weights[weights != 0] += 1

In [None]:
weights.shape

In [None]:
import numpy as np

# Use fixed seed for reproducibility
rng = np.random.RandomState(0)

# 10 coin tosses with 30% chance of heads (1)
samples = rng.binomial(n=1, p=np.array([0.3,0.7]), size=10)

print(samples)

In [None]:
import numpy as np
pwrong = np.random.rand(5)
weights = weights[-30:,:]
pwrong

In [None]:
weights.shape[1]

In [None]:
idx = np.searchsorted(np.array(list(y_probabilities.keys())), y_, sorter = sort_idx )

In [None]:
import inspect
from sklearn.base import BaseEstimator
from sslearn.wrapper import Setred

print(inspect.getsource(Setred))


In [None]:
#Example: Combined class labels
pre_yL = np.array([0, 1, 0, 2, 1])
print("Pre-labeled classes:", pre_yL)
# Construct the class contrast matrix
C = (pre_yL[:, None] != pre_yL[None, :]).astype(int)

In [None]:
pre_yL[:, None].shape

In [None]:
pre_yL.shape

In [None]:
import numpy as np
from scipy.stats import norm

def simulate_ji_matrix(p_wrong, weights, weights_sum, weights_square_sum, n_simulations, random_state):
    n_instances, n_neighbors = weights.shape

    # Precompute simulation probabilities
    p_matrix = np.repeat(p_wrong, n_neighbors).reshape(weights.shape)

    # Matrix to store all simulated ji values
    ji_matrix = np.zeros((n_instances,n_simulations))

    for s in range(n_simulations):
        # Simulate binary decisions
        iid_random = random_state.binomial(1, p_matrix)
        
        # Simulate test statistic
        ji = (iid_random * weights).sum(axis=1)
        ji_matrix[:, s] = ji

        # (Optional for SETRED): Compute p-value and filtering condition
        mu_h0 = p_wrong * weights_sum
        sigma_h0 = np.sqrt((1 - p_wrong) * p_wrong * weights_square_sum)

        z_score = np.divide((ji - mu_h0), sigma_h0, out=np.zeros_like(sigma_h0), where=sigma_h0 != 0)
        oi = norm.sf(abs(z_score), mu_h0, sigma_h0)
        to_add = (oi < 0.05) & (z_score < mu_h0)  # or use self.rejection_threshold

        # If you're using to_add for filtering, handle it externally

    return ji_matrix


def compare_to_observed(jiobs, ji_matrix):
    # Returns a count or proportion of times simulated ji > observed
    return 1 - np.mean(jiobs[:,None] < ji_matrix, axis=1)  # count
    


In [None]:
import numpy as np
from scipy.stats import norm

# Step 1: Define your simulated setup
random_state = np.random.RandomState(42)
n_instances = 5
n_neighbors = 4
n_simulations = 1000

# Example weights between subjects (normally distances)
weights = np.random.rand(n_instances, n_neighbors)

# Probabilities of being wrong (say from prior distribution)
p_wrong = np.random.uniform(0.1, 0.4, size=n_instances)

# Precomputed stats used for normalization
weights_sum = weights.sum(axis=1)
weights_square_sum = (weights ** 2).sum(axis=1)

# Step 2: Create observed test statistic
# Simulate one Bernoulli draw as if observed
p_matrix = np.repeat(p_wrong, n_neighbors).reshape(weights.shape)
iid_observed = random_state.binomial(1, p_matrix)
ji_obs = (iid_observed * weights).sum(axis=1)






In [None]:
# Run simulation and comparison
ji_sim = simulate_ji_matrix(p_wrong, weights, weights_sum, weights_square_sum,
                            n_simulations=2, random_state=random_state)

ji_sim

In [None]:
ji_obs[:, None]

In [None]:
(ji_obs[:, None] < ji_sim)

In [None]:
(ji_obs[:, None] < ji_sim).sum(axis=1)

In [None]:
pvalue_simulated = np.mean(jiobs[:, None] < ji_matrix, axis=1)

In [None]:


counts = compare_to_observed(ji_obs, ji_sim)

# Print results
print("Observed ji:", ji_obs)
print("Simulated ji (first 5):\n", ji_sim[:5])
print("Counts where simulated > observed:", counts)

In [None]:
counts < 1