### Implementing Self-supervised

**Tutorial 8.1. To implement self-training classifier on Iris Dataset**

In [1]:
from sklearn.semi_supervised import SelfTrainingClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

# Load the Iris dataset (labeled data)
X, y = load_iris(return_X_y=True)

# Split data into labeled and unlabeled portions
X_labeled, X_unlabeled, y_labeled, _ = train_test_split(X, y, test_size=0.8, random_state=42)

# Initialize a base classifier (e.g., logistic regression)
base_classifier = LogisticRegression()

# Create a self-training classifier
self_training_clf = SelfTrainingClassifier(base_classifier)

# Fit the model using labeled data
self_training_clf.fit(X_labeled, y_labeled)

# Predict on unlabeled data
y_pred_unlabeled = self_training_clf.predict(X_unlabeled)






**Tutorial 8.2. To implement self-training classifier on Iris Dataset**
Finally, evaluate the performance of your self-training classifier using appropriate metrics (e.g., accuracy, F1-score, etc.). You can compare it with the performance of the base classifier.


In [None]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

# Assuming y_unlabeled_true contains true labels for unlabeled data
accuracy = accuracy_score(y_unlabeled_true, y_pred_unlabeled)
f1 = f1_score(y_unlabeled_true, y_pred_unlabeled, average='weighted')
precision = precision_score(y_unlabeled_true, y_pred_unlabeled, average='weighted')
recall = recall_score(y_unlabeled_true, y_pred_unlabeled, average='weighted')

print(f"Accuracy: {accuracy:.2f}")
print(f"F1-score: {f1:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")


In [None]:

# Note: Calibration of the classifier is important for better results. Calibration of the classifier is essential for better results. You can fine-tune hyperparameters or use techniques like Platt scaling or isotonic regression to improve calibration.

**Tutorial 8.2. To implement word embeddings using self-supervised task using Word2Vec method**

In [None]:
from gensim.models import Word2Vec
from nltk.corpus import brown

# Load a corpus (e.g., Brown corpus)
sentences = brown.sents()

# Train Word2Vec model
model = Word2Vec(sentences, vector_size=100, window=5, min_count=1, sg=1)

# Get word embeddings
vector_king = model.wv['king']
vector_queen = model.wv['queen']

# Similarity between words
similarity = model.wv.similarity('king', 'queen')

print(f"Vector similarity between 'king' and 'queen': {similarity:.2f}")
