In [None]:
!pip install transformers torch

In [None]:
import pandas as pd
import re
# Load data from a file
with open('data.txt', 'r') as f:
    data = f.read()

# Split the data into samples
samples = data.split('--')

# Initialize lists to hold labels and texts
labels = []
texts = []

# Split each sample into label and text, clean the text, and append to the lists
for sample in samples:
    if sample:  # this checks if sample is not an empty string
        split_sample = sample.split('***')
        labels.append(split_sample[0].strip())

        # Clean the text data
        # 1. Remove specified punctuation
        # 2. Remove extra spaces
        # 3. Remove newline characters
        clean_text = re.sub(r'[^\w\s]', '', split_sample[1].strip())
        clean_text = re.sub(r'\s+', ' ', clean_text)
        clean_text = re.sub(r'\n', '', clean_text)

        texts.append(clean_text)

# Create a DataFrame
df = pd.DataFrame({'label': labels, 'text': texts})

df.head()

df['label'] = df['label'].str.replace(' ', '')
df.head()


In [None]:
from transformers import BertTokenizerFast, BertForSequenceClassification
from transformers import Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
import torch

# Prepare the labels
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
labels = le.fit_transform(df['label'])



In [None]:
# Split your texts and labels into train and test sets
train_texts, val_texts, train_labels, val_labels = train_test_split(df['text'].tolist(), labels, test_size=0)

# Initialize the BERT tokenizer
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

In [None]:
# Encode your training and validation sets
train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=128)
val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=128)

In [None]:
# Convert labels to tensor
train_labels = torch.tensor(train_labels)
val_labels = torch.tensor(val_labels)

In [None]:
# Prepare the datasets
class Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx]
        return item

    def __len__(self):
        return len(self.labels)


In [None]:
# Create the datasets using encodings and labels
train_dataset = Dataset(train_encodings, train_labels)
val_dataset = Dataset(val_encodings, val_labels)

In [None]:
# Set up the BERT model for fine-tuning
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(set(labels)))

In [None]:
# Set up the Trainer
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset
)

In [None]:
# Train the model
trainer.train()



Step,Training Loss


TrainOutput(global_step=15, training_loss=4.646981811523437, metrics={'train_runtime': 281.1728, 'train_samples_per_second': 0.832, 'train_steps_per_second': 0.053, 'total_flos': 15405263797248.0, 'train_loss': 4.646981811523437, 'epoch': 3.0})

In [None]:
import torch
import numpy as np

# We've already encoded the validation data and converted them to a Dataset object
# So, we just use the Trainer to make predictions on the validation data
predictions, _, _ = trainer.predict(val_dataset)

# Convert the NumPy arrays to PyTorch tensors
predictions = torch.from_numpy(predictions)

# Now you can use the softmax function
probabilities = torch.nn.functional.softmax(predictions, dim=-1)

# Get the class predictions from the probabilities
class_predictions = torch.argmax(probabilities, dim=-1).numpy()  # convert back to NumPy for comparison

# Make sure val_labels is also a NumPy array
val_labels_np = val_labels.numpy()

# Calculate the accuracy of the predictions
accuracy = (class_predictions == val_labels_np).sum() / len(val_labels_np)

print(f'Validation accuracy: {accuracy}')


Validation accuracy: 0.0


In [None]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels=None):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        if self.labels is not None:
            item['labels'] = self.labels[idx]
        return item

    def __len__(self):
        return len(self.encodings["input_ids"])

# Prepare your new data
new_data = ["""APPEARANCE AND CONSTITUTION:

Children: cannot bear to be washed or bathed; emaciated, big-bellied; restless, hot, kick off the clothes at night; have worms, but the best selected remedy fails.
Skin harsh, wrinkled; child looks like an old man.
Atrophy of children.
Hair: They are apt to have very coarse, strong hair, and always a fairly high color. That is one of the exceptions to the coarse hair, because that type of  child very often has poorly developed eyelashes; they have had repeated attacks of blepharitis, they have crusty eruptions about the eyelids which they have picked and scratched, and consequently the eyelashes tend to be undeveloped or poor.
According to Dr.Borland, two types of  child is there. Much the commonest is a fairly well-nourished, well-grown child, always with a definitely big head. They are usually fairly heavy in build and rather awkward and clumsy in their movements. The other  type, which is usually thin, with a fairly big head but rather spindly legs, very often with a big abdomen, rather poorly developed chest, very often not so much color, tending to be paler, with a definitely rougher skin.
These thin  children have even less stamina than the fat ones, they are more easily exhausted and, like all  patients, they cannot stand for any length of time. They stand badly in the ordinary instance, and if they are kept standing they go to pieces.
Muscles of neck weak; child cannot hold head.
MENTAL GENERALS:

Standing is worst position, it is always uncomfortable.
Dirty, filthy people, prone to skin affections. Aversion to being washed.
Child cross and obstinate.
Child grasps everything within reach and thrusts it into its mouth.
 child likes to touch everything, as if only what they touch is real.
Child kicks the clothes off at night.
 children; badly handled they are dull, heavy, cross, irritable; and properly handled they can be bright, interesting, quite friendly, and very often clever. Some of the  children have a most astonishing command of languages.
Child jumps, starts and screams fearfully.
PHYSICAL GENERALS:

APPETITE:

Good appetite.
 children is that they have an almost perverted desire for out – of – the – ordinary food, the unusual dish that the average child dislikes, the  child will eat with relish.
Another constant feature in both children and adults is that they are always very sluggish after meals, they get heavy and sleepy, they want to lie about, and are irritable when disturbed.
Also they get a hungry period about 11 a.m.  children are liable to be seedy, headachy, and irritable and tired out if they have to wait for their meals.
One very useful pointer about  children is that they are liable to get digestive upsets from milk. The small  baby very often gets sickness, and may get diarrhoea and vomiting, from milk, and this marked milk aggravation is often overlooked.
Nutrition is affected on account of defective assimilation, in spite of voracious appetite, the patient emaciates (esp. children).
URINE:

Enuresis, especially in scrofulous, untidy children.
STOOL:

Constipation: stools hard, knotty, dry, as if burnt; large, painful, child is afraid to have the stool on account of pain, or pain compels child to desist on first effort; alternating with diarrhea. Associated with that is an enlarged abdomen, frequent enlargement of the liver, abnormal appetite, sleepiness after meals, and a very definite tendency to attacks of colic.
Diarrhea in children: agg during dentition.
In addition to the general surface irritability, these children tend to get very marked irritation of all the orifices – nose, ears, mouth, urethra, anus – any orifice tends to be congested, red, hot and itchy.
Diarrhea tending to come on early in the morning, any time after four a.m. , and the stool is always offensive.
 children is that they have disturbed areas of heat; they have hot heads and cold hands, or hot hands and cold feet; or hot feet and cold heads – very often cold, damp heads – local disturbances of heat and cold as well as general disturbances of heat and cold.
Diarrhea; of infants with pale face, profuse sweating, drowsiness, half open eyes, suppression of urine, spasms of limbs, and the child wakes up screaming.
DISCHARGES:

The other constant  characteristic is an offensive odor. Discharges, eruptions, perspiration all are malodorous, and the  child is very difficult to get clean and wholesome.
The discharge is always excoriating, there is a redness about the nose, with intense irritation, the children tending to pick at it until it is raw and bleeding.
SLEEP:

Another feature often met with in  children is that they are often heavy and lethargic and sleepy during the day, and very sleepless at night; also they are liable to get most terrifying nightmares
Another point which occasionally occurs in a  child. It is quite lively in the evening, slow getting to sleep, gets off to sleep, and wakes up soon after in fits of laughter. Happy dreams, wakes up singing.
PARTICULARS:

Extremely red lips and face, flushing easily.
Ears very red; in children.
Chronic conditions, chronic ear discharge, with the  characteristics, the excoriating, offensive discharge, redness about the external ear, intense irritation; the aggravation of any pain from hot applications, particularly hot fomentation.
In all acute or chronic conditions they tend to have a red coated tongue, with a very red tip, and very often a red margin running along the sides, not unlike a Rhus Tox. Tongue. Most of these  patients have a dry mouth, a hot mouth, and they are thirsty. This applies more in acute conditions than in chronic.
Aphthae of children.
Tenesmus: Burning at the anus; Excoriation about the anus: Soreness in the whole intestines: Pressure in the rectum: Prolapsus ani: Child falls asleep as soon as the tenesmus ceases. Worms.
 children often get chronic tonsillitis, a deeply infected throat, very swollen, feeling very hot, with very offensive breath. And most  children with tonsillitis tend to get masses of glands in the neck – more than ordinary tonsillar gland enlargement and it tends to spread, and involve particularly the submaxillary glands. The tonsillitis is accompanied by irregular heat and cold, shivering attacks, sweaty attacks and thirst for cold water.
Chest conditions in  children vary from a mild bronchitis to an acute pneumonia; and again certain features are constant. A tendency to waves of heat and sweat, very often occasional shivers, very often burning extremities, and a very definite heavy smell about the child.
 is one of the most commonly indicated drugs in jaundice of children – acute catarrhal jaundice – particularly with the marked intolerance that  has to milk in its acute conditions, intense skin irritation, feeling of burning heat on the surface very often with attacks of colic, frequently with attacks of diarrhea. A  diarrhea produces an excoriating discharge, redness and rawness about the buttocks, intense irritation, scratching.
Indicated in urticaria in children, particularly if associated with digestive upsets.
Skin conditions always irritates. It is an intense irritation that they cannot leave alone; they describe it in various ways – itching, feeling of animals crawling over the skin, sensation of stinging nettles, and any description that fits an intense irritation of the skin. Scratch until bleeds. Itching aggravation night.
MODALITIES:

Another constant in the  patient, no matter what the condition, whether it is a skin eruption, or a child with rheumatism, or a child with a tummy upset, no matter what condition, it is aggravated by bathing. And  children nearly always look dirty.
Always aggravated by heat."""]

# Tokenize the new data and convert it to a Dataset object
new_encodings = tokenizer(new_data, truncation=True, padding=True, max_length=128)
new_dataset = Dataset(new_encodings)  # no labels for new data

# Use the Trainer to make predictions on the new data
new_predictions, _, _ = trainer.predict(new_dataset)

# Convert the logits to probabilities
new_probabilities = torch.nn.functional.softmax(torch.from_numpy(new_predictions), dim=-1)

# Get the class predictions from the probabilities
new_class_predictions = torch.argmax(new_probabilities, dim=-1).numpy()

# Convert the class predictions to the original label names
new_label_predictions = le.inverse_transform(new_class_predictions)

print(new_label_predictions)


['Sulphur']
