The model card template makes use of Jinja, hence we need to install the necessary package.

In [1]:
!pip install Jinja2



Required import statement

In [2]:
from huggingface_hub import ModelCard, ModelCardData

  from .autonotebook import tqdm as notebook_tqdm


Before running the cell below, upload the model card template (`COMP34812_modelcard_template.md`) provided to you using the Colab file browser (on the left-hand side).

In [3]:
card_data = ModelCardData(
    language='en',
    license='cc-by-4.0',
    tags=['text-classification'],
    # change the line below to specify the URL to your Github/Gitlab repo
    repo="https://github.com/Zezoo123/nlu_group1",
    ignore_metadata_errors=True)

card = ModelCard.from_template(
    card_data=card_data,
    template_path='COMP34812_modelcard_template.md',
    model_id='p45493za-d99547jh-nli',

    model_summary='''This model performs natural language inference by predicting the relationship
      between a pair of texts (premise and hypothesis) using a combination of SBERT and TF-IDF embeddings.''',

    model_description='''This model combines Sentence-BERT (MiniLM) embeddings with TF-IDF features to
      represent input text pairs. These combined features are used to train a logistic regression classifier
      to identify relationships between premise-hypothesis pairs''',

    developers='Joseph Hayes & Zeyad Awadalla',
    base_model_repo='https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2',
    base_model_paper='https://arxiv.org/abs/1908.10084',
    model_type='Supervised',
    model_architecture='SVM (SBERT + TF-IDF Features)',
    language='English',
    base_model='sentence-transformers/all-MiniLM-L6-v2',
    training_data='Data was provided to us by the lecturer',
    hyperparameters='''
      - C (logistic regression): 0.1 (tuned via GridSearchCV)
      - solver: liblinear
      - max_iter: 1000
      - SBERT model: all-MiniLM-L6-v2
      - TF-IDF: binary=True, ngram_range=(1,3), min_df=3
      - combined features: [SBERT | TF-IDF]
    ''',
    speeds_sizes_times='''
      - overall training time: ~5 minutes
      - model size: ~127KB (SBERT) + vectorized features
      - grid search folds: 3
    ''',
    testing_data='Data was porvided to us by the lecturer',
    testing_metrics='''
      - Accuracy
      - Precision (Macro & Weighted)
      - Recall (Macro & Weighted)
      - F1-score (Macro & Weighted)
      - Matthews Correlation Coefficient (MCC)
    ''',
    results=f'''
      - Accuracy: 0.6317
      - Macro-F1: 0.6306
      - Weighted F1: 0.6313
      - MCC: 0.2618
    ''',
    hardware_requirements='''
      - RAM: 8–16 GB
      - GPU: Not required
      - Storage: ~200MB for model + data
    ''',
    software='''
      - sentence-transformers
      - scikit-learn
      - nltk
      - pandas, numpy, tqdm
    ''',
    bias_risks_limitations='''Model relies on pretrained SBERT embeddings and TF-IDF statistics,
      which may reflect biases present in the original corpora.''',
    additional_information='''The combination of semantic embeddings and statistical features
      provided improved performance over the baseline. Hyperparameters were selected using GridSearchCV.'''
)

# the following lines will write a markdown (.md) file; this becomes one of your model cards
# change the filename accordingly
with open('svm_model_card.md', 'w') as model_card:
  model_card.write(card.content)

Repo card metadata block was not found. Setting CardData to empty.
