diff --git a/build/lib/vttformatter/__init__.py b/build/lib/vttformatter/__init__.py index 69ad71f..aeaa49b 100644 --- a/build/lib/vttformatter/__init__.py +++ b/build/lib/vttformatter/__init__.py @@ -1 +1 @@ -__version__ = '1.03' +__version__ = '2.10' diff --git a/build/lib/vttformatter/vttformatter.py b/build/lib/vttformatter/vttformatter.py index 553b299..8b06dba 100644 --- a/build/lib/vttformatter/vttformatter.py +++ b/build/lib/vttformatter/vttformatter.py @@ -1,4 +1,3 @@ -__version__ = '1.0' import numpy as np import os import re @@ -118,6 +117,7 @@ def format_text(self): #initialise a counter to run while it remains less than the length of the message list i=0 while i < len(part_messages)-2: + #print(i, flush = True) #check to see if the start and stop times for subsequent messages are the same, if not append the message to full_messages and increase the counter to check the next line if x[0,i+1] != x[1,i]: full_messages.append(x[2,i]) @@ -125,7 +125,7 @@ def format_text(self): #if the start and stop times are the same initialise an empty string and loop over messages from that point and append them to the string until the start and stop times are no longer consistent else: sentence = '' - while x[0,i+1] == x[1,i]: + while x[0,i+1] == x[1,i] and i+1 < len(part_messages)-2: sentence = sentence + x[2,i] + ' ' i+=1 sentence = sentence + x[2,i] @@ -135,12 +135,21 @@ def format_text(self): #check the last 2 elements of the partial message list and append them to full_messages if x[0,-1] == x[1,-2]: end = x[2,-2] + ' ' + x[2,-1] - full_messages.append(end) + if x[1,-2] == x[1,-3]: + full_messages[-1] = full_messages[-1] + ' ' + end + else: + full_messages.append(end) + + elif x[1,-2] == x[1,-3]: + full_messages[-1] = full_messages[-1] + ' ' + x[2,-2] + full_messages.append(x[2,-1]) + else: full_messages.append(x[2,-2]) full_messages.append(x[2,-1]) #return the list with all the fully combined messages - return part_messages, full_messages + self.full_messages = full_messages + return part_messages, self.full_messages def reformat_vtt(self): """create a new .txt file with the same nane as the original .vtt and write each line in the list containing full messages to the file separated by a blank line. """ diff --git a/dist/vttformatter-2.0-py3-none-any.whl b/dist/vttformatter-2.0-py3-none-any.whl new file mode 100644 index 0000000..3cc51e4 Binary files /dev/null and b/dist/vttformatter-2.0-py3-none-any.whl differ diff --git a/dist/vttformatter-2.0.tar.gz b/dist/vttformatter-2.0.tar.gz new file mode 100644 index 0000000..f8486a5 Binary files /dev/null and b/dist/vttformatter-2.0.tar.gz differ diff --git a/dist/vttformatter-2.10-py3-none-any.whl b/dist/vttformatter-2.10-py3-none-any.whl new file mode 100644 index 0000000..211d9fd Binary files /dev/null and b/dist/vttformatter-2.10-py3-none-any.whl differ diff --git a/dist/vttformatter-2.10.tar.gz b/dist/vttformatter-2.10.tar.gz new file mode 100644 index 0000000..c4e4eb9 Binary files /dev/null and b/dist/vttformatter-2.10.tar.gz differ diff --git a/vttformatter.egg-info/PKG-INFO b/vttformatter.egg-info/PKG-INFO index d310127..ce55b2b 100644 --- a/vttformatter.egg-info/PKG-INFO +++ b/vttformatter.egg-info/PKG-INFO @@ -1,16 +1,20 @@ Metadata-Version: 2.1 Name: vttformatter -Version: 1.3 +Version: 2.10 Summary: WEBVTT to text converter Home-page: https://github.com/georgiewellock/VTT_formatter Author: Georgina L. Wellock Author-email: g.l.wellock@bath.ac.uk License: MIT -Download-URL: https://github.com/georgiewellock/VTT_formatter/archive/1.03.tar.gz +Download-URL: https://github.com/georgiewellock/VTT_formatter/archive/2.10.tar.gz Description: # VttFormatter Converts WEBVTT files into text removing timestamps and identifiers and formatting the text into paragraphs. + `VTT_formatter` is a python package that can be executed using python in the command line or through an interface such as a [Jupyter Notebook](https://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/what_is_jupyter.html) either locally on a machine or using [Azure Notebooks](https://notebooks.azure.com/#). + + Full instructions on using `VTT_formatter` in a Jupyter Notebook, on either Azure Notebooks, or locally using Anaconda can be found on the [wiki](https://github.com/georgiewellock/VTT_formatter/wiki/VTT_formatter-using-Jupyter-Notebooks). + ## Example Input/Output ### Input @@ -76,6 +80,13 @@ Description: # VttFormatter it is crackling. It will still be recording the audio. ``` + ## Simple useage + + The screenshot belows shows the simple implementation of the VTT formatter in a jupyter notebook. This will read in the file defined and create a new `.txt` file in the same directory as the original. + + + Further information can be found in the notebook [here](https://github.com/georgiewellock/VTT_formatter/blob/master/VTT_formatter.ipynb) + ## Installation The simplest way to install this vttformatter is to use `pip` to install from [PyPI](https://pypi.org/project/vttformatter/) diff --git a/vttformatter/vttformatter.py b/vttformatter/vttformatter.py index a884591..82e8191 100644 --- a/vttformatter/vttformatter.py +++ b/vttformatter/vttformatter.py @@ -124,7 +124,6 @@ def format_text(self): else: sentence = '' while x[0,i+1] == x[1,i] and i+1 < len(part_messages)-2: - print(i, flush = True) sentence = sentence + x[2,i] + ' ' i+=1 sentence = sentence + x[2,i] @@ -134,7 +133,15 @@ def format_text(self): #check the last 2 elements of the partial message list and append them to full_messages if x[0,-1] == x[1,-2]: end = x[2,-2] + ' ' + x[2,-1] - full_messages.append(end) + if x[1,-2] == x[1,-3]: + full_messages[-1] = full_messages[-1] + ' ' + end + else: + full_messages.append(end) + + elif x[1,-2] == x[1,-3]: + full_messages[-1] = full_messages[-1] + ' ' + x[2,-2] + full_messages.append(x[2,-1]) + else: full_messages.append(x[2,-2]) full_messages.append(x[2,-1])