-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
250 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
using System.Collections.Generic; | ||
using System.IO; | ||
using System.Linq; | ||
using System.Text.RegularExpressions; | ||
using System.Threading.Tasks; | ||
using WatchWord.Domain.Entity; | ||
|
||
namespace WatchWord.Service | ||
{ | ||
/// <summary>Represents logic for parsing words in the files or streams.</summary> | ||
public class ScanWordParser | ||
{ | ||
/// <summary>Scans the unique words in the StreamReader of the file.</summary> | ||
/// <param name="material">Material entity.</param> | ||
/// <param name="stream">Stream reader for the text file.</param> | ||
/// <returns>Unsorted collection of words in file.</returns> | ||
public List<Word> ParseUnigueWordsInFile(Material material, StreamReader stream) | ||
{ | ||
return ParseFile(material, stream, TypeResult.OnlyUniqueWordsInFile).Words; | ||
} | ||
|
||
/// <summary>Scans all the words and their positions in the StreamReader of the file.</summary> | ||
/// <param name="material">Material entity.</param> | ||
/// <param name="stream">Stream reader for the text file.</param> | ||
/// <returns>Unsorted collection of word compositions in file.</returns> | ||
public List<Composition> ParseAllWordsInFile(Material material, StreamReader stream) | ||
{ | ||
return ParseFile(material, stream, TypeResult.CompositionOfWords).Compositions; | ||
} | ||
|
||
/// <summary>Scans the location of words in the StreamReader of the file.</summary> | ||
/// <param name="material">Material entity.</param> | ||
/// <param name="stream">Stream reader for the text file.</param> | ||
/// <param name="type">Type of result.</param> | ||
private static ScanResult ParseFile(Material material, TextReader stream, TypeResult type) | ||
{ | ||
var pattern = new Regex(@"[^\W_\d]([^\W_\d]|[-’'](?=[^\W_\d]))*([^\W_\d]|['’])?"); | ||
|
||
var wordsLocker = new object(); | ||
var compositionsLocker = new object(); | ||
|
||
var fileWords = new List<Word>(); | ||
var compositions = new List<Composition>(); | ||
var lines = new Dictionary<int, string>(); | ||
|
||
string currentLine; | ||
var counter = 1; | ||
while ((currentLine = stream.ReadLine()) != null) | ||
{ | ||
lines.Add(counter, currentLine.ToLower()); | ||
counter++; | ||
} | ||
|
||
Parallel.ForEach( | ||
lines, | ||
line => | ||
{ | ||
var words = pattern.Matches(line.Value); | ||
for (var i = 0; i < words.Count; i++) | ||
{ | ||
var scanWord = GetOrCreateScanWord(wordsLocker, fileWords, material, words[i].Value); | ||
if (type == TypeResult.CompositionOfWords) | ||
{ | ||
AddWordToCompositions(compositionsLocker, compositions, scanWord, line.Key, words[i].Index + 1); | ||
} | ||
} | ||
}); | ||
|
||
return new ScanResult { Words = fileWords, Compositions = compositions }; | ||
} | ||
|
||
/// <summary>Adds word info to collection of compositions.</summary> | ||
/// <param name="compositionsLocker">Mutex for adding compositions.</param> | ||
/// <param name="compositions">The collection of word compositions.</param> | ||
/// <param name="scanWord">Word entity.</param> | ||
/// <param name="line">Serial number of the line that contains the word.</param> | ||
/// <param name="column">Position of the first character in word, from the beginning of the line.</param> | ||
private static void AddWordToCompositions( | ||
object compositionsLocker, | ||
ICollection<Composition> compositions, | ||
Word scanWord, | ||
int line, | ||
int column) | ||
{ | ||
|
||
var composition = new Composition { Word = scanWord, Line = line, Сolumn = column }; | ||
|
||
lock (compositionsLocker) | ||
{ | ||
compositions.Add(composition); | ||
} | ||
} | ||
|
||
/// <summary>Gets or creates word entity using the word string.</summary> | ||
/// <param name="wordsLocker">Mutex for adding words.</param> | ||
/// <param name="fileWords">Existing words to compare.</param> | ||
/// <param name="material">Material containing this word.</param> | ||
/// <param name="wordText">The word string.</param> | ||
/// <returns>The <see cref="Word"/> entity.</returns> | ||
private static Word GetOrCreateScanWord(object wordsLocker, ICollection<Word> fileWords, Material material, string wordText) | ||
{ | ||
Word word; | ||
lock (wordsLocker) | ||
{ | ||
word = fileWords.FirstOrDefault(w => w.TheWord == wordText); | ||
|
||
if (!Equals(word, default(Word))) | ||
{ | ||
word.Count++; | ||
return word; | ||
} | ||
|
||
word = new Word { Material = material, TheWord = wordText, Count = 1 }; | ||
fileWords.Add(word); | ||
} | ||
|
||
return word; | ||
} | ||
|
||
/// <summary>Nested type of scan result.</summary> | ||
private class ScanResult | ||
{ | ||
/// <summary>Gets or sets unsorted collection of words in the file.</summary> | ||
public List<Word> Words { get; set; } | ||
|
||
/// <summary>Gets or sets unsorted collection of word compositions in the file.</summary> | ||
public List<Composition> Compositions { get; set; } | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
using System.IO; | ||
using WatchWord.Service; | ||
using System.Linq; | ||
using WatchWord.Domain.Entity; | ||
using Microsoft.AspNetCore.Mvc; | ||
using Microsoft.AspNetCore.Http; | ||
using System.Collections.Generic; | ||
using WatchWord.Infrastructure; | ||
using Microsoft.AspNetCore.Authorization; | ||
|
||
namespace WatchWord.Controllers | ||
{ | ||
[Route("api/[controller]")] | ||
public class ParseController : Controller | ||
{ | ||
private readonly ScanWordParser _parser; | ||
|
||
public ParseController() | ||
{ | ||
_parser = new ScanWordParser(); | ||
} | ||
|
||
[HttpPost] | ||
[Authorize] | ||
[Route("File")] | ||
public string File(IFormFile file) | ||
{ | ||
var responseModel = new ParseResponseModel(); | ||
if (file.Length > 35000000) | ||
{ | ||
responseModel.Succeeded = false; | ||
responseModel.Errors.Add("Subtitles file too big!"); | ||
} | ||
else if (file.Length > 0) | ||
{ | ||
var stream = file.OpenReadStream(); | ||
var words = _parser.ParseUnigueWordsInFile(new Material(), new StreamReader(stream)); | ||
var wordsList = words.Select(word => word.TheWord).ToList(); | ||
|
||
if (wordsList.Count > 0) | ||
{ | ||
responseModel.Succeeded = true; | ||
responseModel.Words = wordsList; | ||
} | ||
else | ||
{ | ||
responseModel.Succeeded = false; | ||
responseModel.Errors.Add("Empty subtitles file!"); | ||
} | ||
} | ||
else | ||
{ | ||
responseModel.Succeeded = false; | ||
responseModel.Errors.Add("Empty subtitles file!"); | ||
} | ||
|
||
return ApiJsonSerializer.Serialize(responseModel); | ||
} | ||
|
||
public class ParseResponseModel | ||
{ | ||
public bool Succeeded { get; set; } | ||
public List<string> Words { get; set; } | ||
public List<string> Errors { get; set; } | ||
|
||
public ParseResponseModel() | ||
{ | ||
Words = new List<string>(); | ||
Errors = new List<string>(); | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
import { Injectable } from "@angular/core"; | ||
import { Observable } from "rxjs/Observable"; | ||
import { Http, Response } from "@angular/http"; | ||
import { ParseResponseModel } from "./material.models"; | ||
import "rxjs/add/operator/map"; | ||
let cfg = require('../config').appConfig; | ||
|
||
@Injectable() | ||
export class CreateMaterialService { | ||
private baseUrl: string; | ||
|
||
constructor(private http: Http) { | ||
this.baseUrl = cfg.apiRoute; | ||
} | ||
|
||
parseSubtitles(subtitlesFile: any): Observable<ParseResponseModel> { | ||
let input = new FormData(); | ||
input.append("file", subtitlesFile); | ||
|
||
return this.http.post(this.baseUrl + "/parse/file", input).map((res: Response) => res.json());; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters