# Checking Paper Requirements

The requirements are inside `requirements.txt`, and are related to the WebFormer 2022 paper.

In [18]:
ppr_v_datasets="1.1.3"
ppr_v_numpy="1.19.2"
ppr_v_pandas="1.1.5"
ppr_v_pytorch="1.6"
ppr_v_pytrec_eval="0.5"
ppr_v_requests="2.25.1"
ppr_v_scipy="1.5.4"
ppr_v_tqdm="4.64.1"
ppr_v_transformers="4.2.0"

In [21]:
import datasets
import numpy
import pandas
import torch
# import pytrec_eval
import requests
import scipy
import tqdm
import transformers
from packaging import version  # Import version from the packaging module

def check_version(package_name, installed_version, required_version):
    installed = version.parse(installed_version)
    required = version.parse(required_version)

    if installed >= required:
        print(f'{package_name} [PASSED]\ninstalled {installed} [required {required}]\n')
    else:
        print(f'[OUTDATED] {package_name}\niinstalled {installed} [required {required}]\n')


check_version('datasets', datasets.__version__, ppr_v_datasets)
check_version('numpy', numpy.__version__, ppr_v_numpy)
check_version('pandas', pandas.__version__, ppr_v_pandas)
check_version('torch', torch.__version__, ppr_v_pytorch)
# check_version('pytrec_eval', pytrec_eval.__version__, ppr_v_pytrec_eval)
check_version('requests', requests.__version__, ppr_v_requests)
check_version('scipy', scipy.__version__, ppr_v_scipy)
check_version('tqdm', tqdm.__version__, ppr_v_tqdm)
check_version('transformers', transformers.__version__, ppr_v_transformers)

datasets [PASSED]
installed 2.15.0 [required 1.1.3]

numpy [PASSED]
installed 1.26.2 [required 1.19.2]

pandas [PASSED]
installed 2.0.3 [required 1.1.5]

torch [PASSED]
installed 2.1.1+cu121 [required 1.6]

requests [PASSED]
installed 2.31.0 [required 2.25.1]

scipy [PASSED]
installed 1.11.1 [required 1.5.4]

tqdm [PASSED]
installed 4.66.1 [required 4.64.1]

transformers [PASSED]
installed 4.36.0.dev0 [required 4.2.0]



## Download BERT_base_uncased

In [None]:
from transformers import AutoModel, AutoTokenizer

# Model name from Hugging Face model hub
model_name = "bert-base-uncased"

# Output folder
output_folder = "./bert_base_uncased"

# Create the output folder if it doesn't exist
import os
os.makedirs(output_folder, exist_ok=True)

# Download and save the model
model = AutoModel.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Save the model
model.save_pretrained(output_folder)
tokenizer.save_pretrained(output_folder)

print(f"Model '{model_name}' downloaded & saved to '{output_folder}'.")

## Create Data Folder

In [22]:
import os

def create_directories():
    directories_to_create = [
        "Preprocess/data/endata",
        "Preprocess/data/endata_new_clean",
        "Preprocess/data/training_data"
    ]

    for directory in directories_to_create:
        if not os.path.exists(directory):
            os.makedirs(directory)
            print(f"Directory '{directory}' created.")
        else:
            print(f"Directory '{directory}' already exists.")

if __name__ == "__main__":
    create_directories()


Directory 'Preprocess/data/endata' already exists.
Directory 'Preprocess/data/endata_new_clean' already exists.
Directory 'Preprocess/data/training_data' already exists.
