Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,27 @@ dependencies = [
[project.optional-dependencies]

# dev - the developer dependency set, for contributors to harmony
dev = ["check-manifest", "pytest", "matplotlib"]
dev = ["check-manifest", "pytest", "matplotlib", "ruff"]

[tool.ruff]
target-version = "py310"
line-length = 120
extend-exclude = ["update.ipynb", "Harmony_example_walkthrough.ipynb"]

[tool.ruff.lint]
# Pragmatic baseline for a legacy research codebase:
# enable pyflakes (real bugs), ignore stylistic noise that would require
# touching many files for no functional gain.
select = ["F", "E9"]
ignore = [
"F403", # `from module import *` — used intentionally in __init__.py
"F405", # `*`-import undefined names — paired with F403
"F841", # unused local — common in legacy code, low value to fix now
]

[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401"] # re-exports
"tests/*" = ["F401"]

[project.urls]
"Documentation" = "https://harmonydata.ac.uk/frequently-asked-questions/"
Expand Down
2 changes: 0 additions & 2 deletions src/harmony/matching/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@

from harmony.matching.default_matcher import convert_texts_to_vector
from harmony.schemas.requests.text import Question
from harmony.schemas.responses.text import HarmonyCluster

import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from harmony.matching.deterministic_clustering import find_clusters_deterministic

Expand Down
6 changes: 0 additions & 6 deletions src/harmony/matching/kmeans_clustering.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,11 @@
import sys
from typing import List

import pandas as pd
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score

from harmony.matching.generate_cluster_topics import generate_cluster_topics
from harmony.schemas.requests.text import Question
from harmony.schemas.responses.text import HarmonyCluster

import numpy as np
from sklearn.metrics.pairwise import cosine_similarity


def perform_kmeans(embeddings_in, num_clusters=5):
Expand Down
1 change: 0 additions & 1 deletion src/harmony/matching/wmd_matcher.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from wmd import WMD
import numpy as np
import math
import libwmdrelax
Expand Down
2 changes: 1 addition & 1 deletion src/harmony/parsing/html_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

# Try to import lxml for better performance, fall back to html.parser
try:
import lxml
import lxml # noqa: F401 # availability probe
DEFAULT_PARSER = 'lxml'
except ImportError:
DEFAULT_PARSER = 'html.parser'
Expand Down
4 changes: 1 addition & 3 deletions src/harmony/schemas/requests/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,12 @@
'''

import uuid
from typing import List, Optional
from typing import Any, Dict, List, Optional
from pydantic import ConfigDict, BaseModel, Field
from harmony.schemas.catalogue_instrument import CatalogueInstrument
from harmony.schemas.catalogue_question import CatalogueQuestion
from harmony.schemas.enums.file_types import FileType
from harmony.schemas.enums.languages import Language
from pydantic import ConfigDict, BaseModel, Field
from typing import Any, Dict, List, Optional

DEFAULT_FRAMEWORK = "huggingface"
DEFAULT_MODEL = 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2'
Expand Down
3 changes: 1 addition & 2 deletions src/harmony/services/export_pdf_report.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import os
import io
from datetime import datetime
from typing import List, Optional, Tuple
from typing import List, Tuple
import tempfile
from fpdf import FPDF

Expand Down
2 changes: 1 addition & 1 deletion src/harmony/util/url_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def _validate_url(self, url: str) -> None:
parsed = urllib.parse.urlparse(url)

if parsed.scheme not in ALLOWED_SCHEMES:
raise BadRequestError(f"URL must use HTTPS")
raise BadRequestError("URL must use HTTPS")

if not parsed.netloc or '.' not in parsed.netloc:
raise BadRequestError("Invalid domain")
Expand Down
Loading