Skip to content

Commit

Permalink
Merge pull request #112 from h1alexbel/install-requires
Browse files Browse the repository at this point in the history
feat(#109): prepare fields in input.py, swap text_prediction for rf
  • Loading branch information
h1alexbel committed May 10, 2024
2 parents 36311a7 + a0fa08d commit fef5602
Show file tree
Hide file tree
Showing 9 changed files with 703 additions and 34 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ pipeline/
.factorypath
pyvenv.cfg
.coverage
*.csv
predictions.csv
out.csv
9 changes: 2 additions & 7 deletions src/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

import typer

from .pre_filter import PreFilter
from .model_map import ModelMap
from .filter_pipe import FilterPipe
from src import NAME, VERSION
Expand Down Expand Up @@ -55,14 +56,8 @@ def filter(
"""
Filter repositories.
"""
PreFilter(out).prepare()
models = ModelMap().build()
# @todo #18:30min Find effective way for processing readme.
# For now we are not processing readme because of
# <a href="https://github.com/h1alexbel/samples-filter/issues/39">this</a>.
# We need to find actual way to process readme too since it can be crucial
# data as model input. Let's study papers, outlined
# <a href="https://github.com/yegor256/cam/issues/227#issue-2200080559">here</a>
# first, rethink it and try to implement here.
FilterPipe(repositories, out, models.get(model), typer).apply()


Expand Down
9 changes: 4 additions & 5 deletions src/feed.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,10 @@ class Feed:
def __init__(self, file):
self.file = file

# @todo #105:60min Process all fields required as inputs.
# We should process all fields required as inputs: full_name, readme,
# created_at, last_commit. In case of transformer we should do it in a
# prompt way, like repository advanced description. Check
# <a href="https://github.com/h1alexbel/samples-filter/issues/75#issuecomment-2094153280">this</a>.
# @todo #109:90min Feed `readme`, `last_commit`, `created_at`, and `commits`.
# We should feed other important fields too. For now we can feed readme,
# but transformer model can't process it since input tensor is too big.
# Let's resolve that problem and feed readme.
def read(self):
with open(self.file, "r") as input:
csv.field_size_limit(2 * 1024 * 1024 * 1024)
Expand Down
3 changes: 2 additions & 1 deletion src/filter_pipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# SOFTWARE.
import csv

from .input import Input
from .feed import Feed
from .text_prediction import TextPrediction

Expand All @@ -43,7 +44,7 @@ def __init__(self, repos, output, mdl, typer):
def apply(self):
instance = self.model()
self.typer.echo(f"Filtering {self.repos} with {instance.name()}...")
feed = Feed(self.repos).read()
feed = Feed(Input(self.repos).copy()).read()
with open("predictions.csv", "w") as predictions:
writer = csv.DictWriter(
predictions,
Expand Down
17 changes: 0 additions & 17 deletions src/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,39 +39,22 @@ def copy(self):
pipe,
fieldnames=[
"full_name",
"default_branch",
"stars",
"forks",
"created_at",
"size",
"open_issues_count",
"description",
"topics",
"readme"
]
)
writer.writeheader()
for row in reader:
repo = row["full_name"]
branch = row["default_branch"]
stars = row["stars"]
forks = row["forks"]
created = row["created_at"]
size = row["size"]
issues = row["open_issues_count"]
description = row["description"]
topics = row["topics"]
readme = Readme(repo, branch).asText()
out = {
"full_name": repo,
"default_branch": branch,
"stars": stars,
"forks": forks,
"created_at": created,
"size": size,
"open_issues_count": issues,
"description": description,
"topics": topics,
"readme": readme
}
writer.writerow(out)
Expand Down
2 changes: 1 addition & 1 deletion src/text_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def __init__(self, pred, name):

def as_text(self):
if self.model == "rf":
if self.pred == [0]:
if self.pred == [1]:
label = "sample"
else:
label = "real"
Expand Down

3 comments on commit fef5602

@0pdd
Copy link
Collaborator

@0pdd 0pdd commented on fef5602 May 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Puzzle 18-d3f80dfc disappeared from objects/cli.py), that's why I closed #62. Please, remember that the puzzle was not necessarily removed in this particular commit. Maybe it happened earlier, but we discovered this fact only now.

@0pdd
Copy link
Collaborator

@0pdd 0pdd commented on fef5602 May 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Puzzle 105-16a41eb4 disappeared from src/feed.py), that's why I closed #109. Please, remember that the puzzle was not necessarily removed in this particular commit. Maybe it happened earlier, but we discovered this fact only now.

@0pdd
Copy link
Collaborator

@0pdd 0pdd commented on fef5602 May 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Puzzle 109-cde4ae97 discovered in src/feed.py) and submitted as #113. Please, remember that the puzzle was not necessarily added in this particular commit. Maybe it was added earlier, but we discovered it only now.

Please sign in to comment.