In [25]:
import cv2
import pytesseract
import numpy as np

# Funkcja do przetwarzania obrazu i wyodrębnienia tekstu z komórek
def extract_text_from_cells(image_path):
    # Wczytaj obraz
    image = cv2.imread(image_path)

    # Konwersja obrazu do skali szarości
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Użycie thresholdingu do binarizacji obrazu
    _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV)

    # Operacje morfologiczne do podkreślenia struktur prostokątnych
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
    morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)

    # Znalezienie konturów
    contours, _ = cv2.findContours(morphed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Lista do przechowywania wyników
    results = []

    # Przetworzenie każdego konturu
    for cnt in contours:
        
        x, y, w, h = cv2.boundingRect(cnt)
        print(x)
        # Filtruj małe kontury, które mogą nie być komórkami
        if w > 5 and h > 5:
            print(['w:',w])
            # Wyciągnięcie fragmentu obrazu, który reprezentuje komórkę
            cell = image[y:y+h, x:x+w]
            
            # OCR na wyodrębnionym fragmencie
            text = pytesseract.image_to_string(cell, config='--psm 3' ).strip()
            print(['text',text])
            # Sprawdzenie, czy tekst zawiera numer pozycji i kwotę
            if text:
                # Zakładamy, że numer pozycji jest na początku, a kwota na końcu tekstu
                parts = text.split('\n')
                if len(parts) >= 1:
                    position_number = parts[0].strip()
                    amount = parts[-1].strip()
                    
                    # Dodatkowe filtrowanie, aby upewnić się, że numer pozycji to liczba
                    #if position_number.isdigit():
                    results.append((position_number, amount))
                else:
                    results.append((None, text))
                
    # Sortowanie wyników na podstawie pozycji x, y (jeśli dostępny numer pozycji)
    results.sort(key=lambda x: (x[0] if x[0] else float('inf'), x[1]))

    return results

# Wczytaj i przetwórz obraz
image_path = r"image/test.jpg"
results = extract_text_from_cells(image_path)

# Wyświetlenie wyników
for result in results:
    print(f'Pozycja: {result[0]}, Kwota: {result[1]}')

192
178
100
['w:', 75]
['text', '']
35
['w:', 60]
['text', '']
605
830
['w:', 27]
['text', '']
19
['w:', 805]
['text', '7 aa sare es reemo Mare 7 Eo 7 ca\n=\na2. | Cs\n== = ca =\nta %\njesse TE *14s600.00/"" 4416872] 102.431,28] 10455\nD.2. DOCHODY | STRATY MALZONKA\nEEE SY CEO DENY, a e Cy Te.']
0
['w:', 13]
['text', '']
Pozycja: 7 aa sare es reemo Mare 7 Eo 7 ca, Kwota: EEE SY CEO DENY, a e Cy Te.


In [35]:
import cv2
import numpy as np
import pytesseract

def extract_cells(image_path):
    # Wczytaj obraz
    image = cv2.imread(image_path)
    
    # Konwersja do skali szarości
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Adaptive Thresholding dla wyraźnego binarnego obrazu
    thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY_INV, 15, 10)

    # Detekcja linii poziomych
    horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 1))
    detect_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
    
    # Detekcja linii pionowych
    vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 10))
    detect_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)

    # Łączenie wykrytych linii, aby uzyskać pełne granice komórek
    grid = cv2.add(detect_horizontal, detect_vertical)

    # Znalezienie konturów komórek
    contours, _ = cv2.findContours(grid, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Lista do przechowywania wyników
    results = []
    i = 0
    for cnt in contours:
        i = i + 1
        x, y, w, h = cv2.boundingRect(cnt)

        # Filtracja bardzo małych konturów
        if w > 5 and h > 5:
            # Wycięcie komórki z obrazu
            cell = image[y:y+h, x:x+w]

            cv2.imwrite(f'image/div/p{i}.jpg',cell)
            
            # OCR na wyodrębnionej komórce
            text = pytesseract.image_to_string(cell, config='--psm 3').strip()
            print(['text',text])
            if text:
                # Zakładamy, że numer pozycji jest na początku, a kwota na końcu tekstu
                parts = text.split('\n')
                if len(parts) >= 2:
                    position_number = parts[0].strip()
                    amount = parts[-1].strip()
                    results.append((position_number, amount))
                else:
                    results.append((None, text))
    
    # Sortowanie wyników na podstawie pozycji x, y
    results.sort(key=lambda x: (x[0] if x[0] else float('inf'), x[1]))

    return results

# Wczytaj i przetwórz obraz
image_path = r"image/test.jpg"
results = extract_cells(image_path)

# Wyświetlenie wyników
for result in results:
    print(f'Pozycja: {result[0]}, Kwota: {result[1]}')


['text', '7 Pras are ie paw o ae 7 Es 7 7\npar7snaeey mtane prchody.s Fae =\n‘a ba 8) pots stofe Soh\neae |\na2. |*\n= oa =\n; 146600,00/"" —-44168,72|* — 102.431,28 10455\nSuna wo eyo 30 : : 28]\nD.2. DOCHODY | STRATY MALZONKA\nEEE SY CEO DENY, a e Cy Te.']
Pozycja: 7 Pras are ie paw o ae 7 Es 7 7, Kwota: EEE SY CEO DENY, a e Cy Te.


In [41]:
import cv2
import numpy as np
import pytesseract

def extract_cells(image_path):
    # Wczytaj obraz
    image = cv2.imread(image_path)
    
    # Konwersja do skali szarości
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Adaptive Thresholding dla wyraźnego binarnego obrazu
    thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY_INV, 11, 2)
    
    # Detekcja linii poziomych
    horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40, 1))
    detect_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
    
    # Detekcja linii pionowych
    vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 40))
    detect_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
    
    # Łączenie linii pionowych i poziomych, aby uzyskać pełną siatkę
    grid = cv2.addWeighted(detect_horizontal, 0.5, detect_vertical, 0.5, 0.0)
    
    # Wypełnianie dziur w siatce tabeli
    grid = cv2.dilate(grid, np.ones((3, 3), np.uint8))
    
    # Znalezienie konturów komórek
    contours, _ = cv2.findContours(grid, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    
    # Lista do przechowywania wyników
    results = []
    i = 0
    for cnt in contours:
        i = i + 1
        x, y, w, h = cv2.boundingRect(cnt)

        # Filtracja bardzo małych konturów
        if w > 5 and h > 5:
            # Wycięcie komórki z obrazu
            cell = image[y:y+h, x:x+w]
            cv2.imwrite(f'image/div/c{i}.jpg',cell)
            # OCR na wyodrębnionej komórce
            text = pytesseract.image_to_string(cell, config='--psm 3').strip()
            print(['text',text])
            if text:
                # Zakładamy, że numer pozycji jest na początku, a kwota na końcu tekstu
                parts = text.split('\n')
                if len(parts) >= 2:
                    position_number = parts[0].strip()
                    amount = parts[-1].strip()
                    results.append((position_number, amount))
                else:
                    results.append((None, text))
    
    # Sortowanie wyników na podstawie pozycji x, y
    #results.sort(key=lambda x: (x[0] if x[0] else float('inf'), x[1]))

    return results

# Wczytaj i przetwórz obraz
image_path = r"image/test.jpg"
results = extract_cells(image_path)

# Wyświetlenie wyników
for result in results:
    print(f'Pozycja: {result[0]}, Kwota: {result[1]}')


['text', '']
['text', '7 Pras are ie paw o ae 7 Eo 7 7\npa 73 naezy mane savchody.do Fae =\na poe 80 poate sate Soh\nBaten |\na2. 3 |*\n= ca =\njesse TE 146600,00]"" 4416872" 102.431,28] 10455\nD.2. DOCHODY | STRATY MALZONKA\nEEE SY CEO DENY, a e Cy Te.']
['text', 'D.2. DOCHODY | STRATY MALZONKA']
['text', '10455']
['text', '']
['text', '“102. 431,28']
['text', '“4416872']
['text', '146 600,00']
['text', '']
['text', '']
['text', '']
['text', '']
['text', '']
['text', '']
['text', '']
['text', '']
['text', '']
['text', '']
['text', '']
['text', '']
['text', '“fat Wut ieee ducts']
['text', '']
['text', '']
['text', '']
['text', '']
['text', '']
['text', '']
['text', 'm']
['text', '). Prame euharehte | tm pram, © Reryen\n" fpows wart 18 uctawy\n\nW50n73 ney stam seschoey 2\nsyn w Daz 9 pocatr safe So\n\noe pel']
['text', '']
Pozycja: 7 Pras are ie paw o ae 7 Eo 7 7, Kwota: EEE SY CEO DENY, a e Cy Te.
Pozycja: None, Kwota: D.2. DOCHODY | STRATY MALZONKA
Pozycja: None, Kwota: 10455
Pozycja

In [36]:
import cv2
import pytesseract

image = cv2.imread(r'image/div/c8.jpg', 0)
#thresh = cv2.threshold(image, 220, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

data = pytesseract.image_to_string(image, lang='eng',config='--oem 1 --psm 6')
print(data)


Fi 146 600,00



In [43]:
import cv2
import pytesseract

def extract_numbers_from_image(image_path):
    """
    Wyciąga dwie liczby z obrazka: małą z lewego górnego rogu i dużą z prawego dolnego.

    Args:
        image_path (str): Ścieżka do obrazka.

    Returns:
        tuple: Krotka zawierająca dwie liczby (mała, duża).
    """

    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    thresh = cv2.threshold(gray, 120, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

    # Znajdź kontury
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Posortuj kontury według pozycji (od lewej do prawej, od góry do dołu)
    contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[0] + cv2.boundingRect(ctr)[1] * img.shape[1])

    # Wyodrębnij tekst z pierwszego i ostatniego konturu
    small_number_roi = cv2.boundingRect(contours[0])
    large_number_roi = cv2.boundingRect(contours[-1])

    small_number_text = pytesseract.image_to_string(thresh[small_number_roi[1]:small_number_roi[1]+small_number_roi[3], 
                                                          small_number_roi[0]:small_number_roi[0]+small_number_roi[2]], 
                                                    config='--psm 6')
    large_number_text = pytesseract.image_to_string(thresh[large_number_roi[1]:large_number_roi[1]+large_number_roi[3], 
                                                          large_number_roi[0]:large_number_roi[0]+large_number_roi[2]], 
                                                    config='--psm 6')

    # Przekształć tekst na liczby
    small_number = small_number_text.strip()
    large_number = large_number_text.strip().replace(",", ".")  # Zamień przecinek na kropkę

    return small_number, large_number

# Przykład użycia
image_path = r'image/div/c8.jpg'  # Zastąp poprawną ścieżką
small_number, large_number = extract_numbers_from_image(image_path)
print("Mała liczba:", small_number)
print("Duża liczba:", large_number)

Mała liczba: 
Duża liczba: 


In [75]:
import cv2
import pytesseract

def extract_numbers(image_path):
    # Wczytaj obraz
    image = cv2.imread(image_path)
    
    # Pobierz wymiary obrazu
    h, w, _ = image.shape
    
    # Wyodrębnienie regionu w lewym górnym rogu (przyjmujemy 20% szerokości i wysokości)
    roi_top_left = image[0:int(h*0.5), 0:int(w*0.3)]
    
    # Wyodrębnienie regionu w prawym dolnym rogu (przyjmujemy 20% szerokości i wysokości)
    roi_bottom_right = image[int(h*0):h, int(w*0):w]
    cv2.imwrite(f'image/div/c8t.jpg',roi_top_left)
    # Zastosowanie OCR do regionu lewego górnego
    number_top_left = pytesseract.image_to_string(roi_top_left, config='--psm 11').strip()
    
    # Zastosowanie OCR do regionu prawego dolnego
    number_bottom_right = pytesseract.image_to_string(roi_bottom_right, config='--psm 3').strip()
    
    return number_top_left, number_bottom_right

# Ścieżka do obrazu
image_path = r'image/div/c8.jpg'

# Wyodrębnij liczby
number_top_left, number_bottom_right = extract_numbers(image_path)

# Wyświetlenie wyników
print(f'Liczba w lewym górnym rogu: {number_top_left}')
print(f'Liczba w prawym dolnym rogu: {number_bottom_right}')

Liczba w lewym górnym rogu: 
Liczba w prawym dolnym rogu: 146 600,00


In [82]:
import cv2
import pytesseract

def preprocess_image(image):
    # Zwiększenie rozdzielczości obrazu (skalowanie w górę)
    #image = cv2.resize(image, None, fx=5, fy=5, interpolation=cv2.INTER_CUBIC)
    
    # Konwersja do skali szarości
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Wzmocnienie kontrastu (zastosowanie rozciągania histogramu)
    #gray = cv2.equalizeHist(gray)
    
    # Usunięcie szumów
    #gray = cv2.medianBlur(gray, 3)
    
    # Binaryzacja (zmiana na obraz czarno-biały)
    binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    
    return binary

def extract_numbers(image_path):
    # Wczytaj obraz
    image = cv2.imread(image_path)
    
    # Pobierz wymiary obrazu
    h, w, _ = image.shape
    
    # Wyodrębnienie regionu w lewym górnym rogu (przyjmujemy 20% szerokości i wysokości)
    roi_top_left = image[0:int(h*0.5), 0:int(w*0.3)]
    
    # Wyodrębnienie regionu w prawym dolnym rogu (przyjmujemy 20% szerokości i wysokości)
    roi_bottom_right = image[int(h*0.0):h, int(w*0.0):w]
    
    # Przetwarzanie obrazu przed OCR
    processed_top_left = preprocess_image(roi_top_left)
    processed_bottom_right = preprocess_image(roi_bottom_right)
    
    # Zastosowanie OCR do regionu lewego górnego
    number_top_left = pytesseract.image_to_string(processed_top_left, config='--psm 3').strip()
    
    # Zastosowanie OCR do regionu prawego dolnego
    number_bottom_right = pytesseract.image_to_string(processed_bottom_right, config='--psm 0').strip()
    
    return number_top_left, number_bottom_right

# Ścieżka do obrazu
image_path = r'image/div/c8t.jpg'

# Wyodrębnij liczby
number_top_left, number_bottom_right = extract_numbers(image_path)

# Wyświetlenie wyników
print(f'Liczba w lewym górnym rogu: {number_top_left}')
print(f'Liczba w prawym dolnym rogu: {number_bottom_right}')


TesseractError: (1, 'Warning. Invalid resolution 0 dpi. Using 70 instead. Too few characters. Skipping this page Error during processing.')

In [19]:
import cv2
import numpy as np
import pytesseract

def upscale_image(image, scale=0.5):
    # Zwiększenie rozdzielczości przy użyciu interpolacji Lanczosa
    return cv2.resize(image, None, fx=scale, fy=scale, interpolation=cv2.INTER_LANCZOS4)

def preprocess_image(image):
    # Konwersja do skali szarości
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Zastosowanie filtru Canny do wykrywania krawędzi
    edges = cv2.Canny(gray, 100, 150, apertureSize=3)
    
    # Rozszerzenie krawędzi (dylacja)
    edges = cv2.dilate(edges, None, iterations=1)
    
    # Wypełnienie krawędzi (erozja)
    edges = cv2.erode(edges, None, iterations=1)
    
    # Odszumianie
    #edges = cv2.GaussianBlur(edges, (2, 2), 0)
    
    return edges

def extract_numbers(image_path):
    # Wczytaj obraz
    image = cv2.imread(image_path)
    
    # Zwiększenie rozdzielczości obrazu
    image_upscaled = upscale_image(image,0.5)
    
    # Pobierz wymiary obrazu
    h, w, _ = image_upscaled.shape
    
    # Wyodrębnienie regionu w lewym górnym rogu (przyjmujemy 20% szerokości i wysokości)
    roi_top_left = image_upscaled[0:int(h*0.5), 0:int(w*0.3)]
    
    # Wyodrębnienie regionu w prawym dolnym rogu (przyjmujemy 20% szerokości i wysokości)
    roi_bottom_right = image_upscaled[int(h*0):h, int(w*0.0):w]
    
    # Przetwarzanie obrazu przed OCR
    processed_top_left = roi_top_left
    processed_bottom_right = roi_bottom_right
    
    # Zastosowanie OCR do regionu lewego górnego
    number_top_left = pytesseract.image_to_string(processed_top_left, config='--psm 6 --oem 1').strip()
    
    # Zastosowanie OCR do regionu prawego dolnego
    number_bottom_right = pytesseract.image_to_string(processed_bottom_right, config='--psm 3 --oem 1').strip()
    
    return number_top_left, number_bottom_right

# Ścieżka do obrazu
image_path = r'image/div/c8.jpg'

# Wyodrębnij liczby
number_top_left, number_bottom_right = extract_numbers(image_path)

# Wyświetlenie wyników
print(f'Liczba w lewym górnym rogu: {number_top_left}')
print(f'Liczba w prawym dolnym rogu: {number_bottom_right}')


Liczba w lewym górnym rogu: 
Liczba w prawym dolnym rogu: OS 146 600,00


In [127]:
!pip install surya-ocr

Collecting surya-ocr
  Downloading surya_ocr-0.5.0-py3-none-any.whl.metadata (26 kB)
Collecting filetype<2.0.0,>=1.2.0 (from surya-ocr)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting ftfy<7.0.0,>=6.1.3 (from surya-ocr)
  Downloading ftfy-6.2.3-py3-none-any.whl.metadata (7.8 kB)
Collecting pydantic-settings<3.0.0,>=2.1.0 (from surya-ocr)
  Downloading pydantic_settings-2.4.0-py3-none-any.whl.metadata (3.5 kB)
Collecting tabulate<0.10.0,>=0.9.0 (from surya-ocr)
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Downloading surya_ocr-0.5.0-py3-none-any.whl (103 kB)
   ---------------------------------------- 0.0/103.6 kB ? eta -:--:--
   --------------------------- ------------ 71.7/103.6 kB 3.8 MB/s eta 0:00:01
   ---------------------------------------- 103.6/103.6 kB 1.5 MB/s eta 0:00:00
Downloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)
Downloading ftfy-6.2.3-py3-none-any.whl (43 kB)
   ---------------------------------------- 0.0/43.0 


[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
from surya_functions import get_image_text 

ModuleNotFoundError: No module named 'surya_functions'

In [2]:
!pip install keras-ocr -q


[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [7]:
import keras_ocr
image_path = r'image/div/c8.jpg'
recognizer = keras_ocr.recognition.Recognizer(alphabet="0123456789.,")
pipeline = keras_ocr.pipeline.Pipeline(recognizer=recognizer)
extract_info = pipeline.recognize([image_path])
print(extract_info[0][0])

Provided alphabet does not match pretrained alphabet. Using backbone weights only.
Looking for C:\Users\G\.keras-ocr\crnn_kurapan_notop.h5
Looking for C:\Users\G\.keras-ocr\craft_mlt_25k.h5
('.821,1,12146', array([[ 5.,  0.],
       [20.,  0.],
       [20., 10.],
       [ 5., 10.]], dtype=float32))


In [8]:
print(extract_info)

[[('.821,1,12146', array([[ 5.,  0.],
       [20.,  0.],
       [20., 10.],
       [ 5., 10.]], dtype=float32)), ('4141,1,72721246', array([[ 45.,   5.],
       [115.,   5.],
       [115.,  21.],
       [ 45.,  21.]], dtype=float32))]]


In [4]:
!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.17.0-cp311-cp311-win_amd64.whl.metadata (3.2 kB)
Collecting tensorflow-intel==2.17.0 (from tensorflow)
  Downloading tensorflow_intel-2.17.0-cp311-cp311-win_amd64.whl.metadata (5.0 kB)
Collecting absl-py>=1.0.0 (from tensorflow-intel==2.17.0->tensorflow)
  Using cached absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow-intel==2.17.0->tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow-intel==2.17.0->tensorflow)
  Downloading flatbuffers-24.3.25-py2.py3-none-any.whl.metadata (850 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow-intel==2.17.0->tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow-intel==2.17.0->tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensor


[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
!pip install tensorflow==2.15

Collecting tensorflow==2.15
  Downloading tensorflow-2.15.0-cp311-cp311-win_amd64.whl.metadata (3.6 kB)
Collecting tensorflow-intel==2.15.0 (from tensorflow==2.15)
  Downloading tensorflow_intel-2.15.0-cp311-cp311-win_amd64.whl.metadata (5.1 kB)
Collecting ml-dtypes~=0.2.0 (from tensorflow-intel==2.15.0->tensorflow==2.15)
  Downloading ml_dtypes-0.2.0-cp311-cp311-win_amd64.whl.metadata (20 kB)
Collecting wrapt<1.15,>=1.11.0 (from tensorflow-intel==2.15.0->tensorflow==2.15)
  Downloading wrapt-1.14.1-cp311-cp311-win_amd64.whl.metadata (6.9 kB)
Collecting tensorboard<2.16,>=2.15 (from tensorflow-intel==2.15.0->tensorflow==2.15)
  Downloading tensorboard-2.15.2-py3-none-any.whl.metadata (1.7 kB)
Collecting tensorflow-estimator<2.16,>=2.15.0 (from tensorflow-intel==2.15.0->tensorflow==2.15)
  Downloading tensorflow_estimator-2.15.0-py2.py3-none-any.whl.metadata (1.3 kB)
Collecting keras<2.16,>=2.15.0 (from tensorflow-intel==2.15.0->tensorflow==2.15)
  Downloading keras-2.15.0-py3-none-any.

  You can safely remove it manually.
  You can safely remove it manually.
  You can safely remove it manually.

[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [9]:
!pip install paddleocr

Collecting paddleocr
  Downloading paddleocr-2.8.1-py3-none-any.whl.metadata (19 kB)
Collecting lmdb (from paddleocr)
  Downloading lmdb-1.5.1-cp311-cp311-win_amd64.whl.metadata (1.1 kB)
Collecting rapidfuzz (from paddleocr)
  Downloading rapidfuzz-3.9.7-cp311-cp311-win_amd64.whl.metadata (12 kB)
Collecting opencv-contrib-python (from paddleocr)
  Downloading opencv_contrib_python-4.10.0.84-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting cython (from paddleocr)
  Downloading Cython-3.0.11-cp311-cp311-win_amd64.whl.metadata (3.2 kB)
Collecting python-docx (from paddleocr)
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Collecting fire>=0.3.0 (from paddleocr)
  Downloading fire-0.6.0.tar.gz (88 kB)
     ---------------------------------------- 0.0/88.4 kB ? eta -:--:--
     --------------------------- ------------ 61.4/88.4 kB 3.2 MB/s eta 0:00:01
     --------------------------- ------------ 61.4/88.4 kB 3.2 MB/s eta 0:00:01
     --------------------------- ------

ERROR: Could not install packages due to an OSError: [WinError 5] Odmowa dostępu: 'C:\\Users\\G\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\cv2\\cv2.pyd'
Consider using the `--user` option or check the permissions.


[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
from paddleocr import PaddleOCR
ocr = PaddleOCR(lang='en')
img_path = r'image/div/c8.jpg'
result = ocr.ocr(img_path,det = False,cls=False)
for idx in range(len(result)):
    res = result[idx]
    for line in res:
        print(line)

[2024/09/04 13:02:53] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='C:\\Users\\G/.paddleocr/whl\\det\\en\\en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='C:\\Users\\G/.paddleocr/whl\\rec\\en\\en_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, max_t

In [5]:
from paddleocr import PaddleOCR
from PIL import Image

ocr = PaddleOCR()

cropped_img_path = r'image/div/c8.jpg'
cropped_img = Image.open(cropped_img_path)

result = ocr.ocr_for_single_line(cropped_img)

#recognized_text = result[0][0]

print('Recognized Text:', result)

[2024/09/04 13:21:29] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='C:\\Users\\G/.paddleocr/whl\\det\\ch\\ch_PP-OCRv4_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='C:\\Users\\G/.paddleocr/whl\\rec\\ch\\ch_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, max_t

AttributeError: 'PaddleOCR' object has no attribute 'ocr_for_single_line'

In [2]:
from paddleocr import PaddleOCR, draw_ocr

# Zainicjalizuj obiekt OCR. Użyj GPU, jeśli jest dostępne, dla szybszego działania
ocr = PaddleOCR(use_angle_cls=True, lang='en') 

# Podaj ścieżkę do swojego obrazka JPG
img_path = r'image/div/c8.jpg'

# Wykonaj OCR na obrazku
result = ocr.ocr(img_path, cls=True)

# Wyświetl wyodrębniony tekst
for idx in range(len(result)):
    res = result[idx]
    for line in res:
        print(line)

# Opcjonalnie: Narysuj wykryte pola tekstowe na obrazku i zapisz wynik
from PIL import Image

image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]




[2024/09/04 16:07:33] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='C:\\Users\\G/.paddleocr/whl\\det\\en\\en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='C:\\Users\\G/.paddleocr/whl\\rec\\en\\en_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, max_t

In [None]:
ocr.ocr(img_path, cls=True)

In [None]:
from PIL import Image
from surya.ocr import run_ocr
from surya.model.detection.model import load_model as load_det_model, load_processor as load_det_processor
from surya.model.recognition.model import load_model as load_rec_model
from surya.model.recognition.processor import load_processor as load_rec_processor

IMAGE_PATH = r'image/div/c8.jpg'
image = Image.open(IMAGE_PATH)
langs = ["en"] # Replace with your languages - optional but recommended
det_processor, det_model = load_det_processor(), load_det_model()
rec_model, rec_processor = load_rec_model(), load_rec_processor()

predictions = run_ocr([image], [langs], det_model, det_processor, rec_model, rec_processor)

In [2]:
predictions

[OCRResult(text_lines=[TextLine(polygon=[[0.0, 0.0], [23.0, 0.0], [23.0, 9.0], [0.0, 9.0]], confidence=0.7424973845481873, text=' 80.', bbox=[0.0, 0.0, 23.0, 9.0]), TextLine(polygon=[[42.0, 5.0], [116.0, 5.0], [116.0, 21.0], [42.0, 21.0]], confidence=0.9186222553253174, text='146 600,00', bbox=[42.0, 5.0, 116.0, 21.0])], languages=['en'], image_bbox=[0.0, 0.0, 117.0, 28.0])]

In [3]:
predictions2 = run_ocr([image], [langs], det_model, det_processor, rec_model, rec_processor)

Detecting bboxes: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:09<00:00,  9.72s/it]
Recognizing Text: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:10<00:00, 10.59s/it]


In [4]:
predictions2

[OCRResult(text_lines=[TextLine(polygon=[[0.0, 0.0], [23.0, 0.0], [23.0, 9.0], [0.0, 9.0]], confidence=0.7424973845481873, text=' 80.', bbox=[0.0, 0.0, 23.0, 9.0]), TextLine(polygon=[[42.0, 5.0], [116.0, 5.0], [116.0, 21.0], [42.0, 21.0]], confidence=0.9186222553253174, text='146 600,00', bbox=[42.0, 5.0, 116.0, 21.0])], languages=['en'], image_bbox=[0.0, 0.0, 117.0, 28.0])]

In [2]:
predictions2 = run_ocr([image], [langs], det_model, det_processor, rec_model, rec_processor)

NameError: name 'run_ocr' is not defined

In [6]:
import cv2
import pytesseract
from PIL import Image
import numpy as np

# Opcjonalnie ustaw ścieżkę do Tesseract, jeśli nie jest w PATH
# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

def preprocess_image(image_path):
    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    gray = cv2.medianBlur(gray, 3)
    gray = cv2.adaptiveThreshold(
        gray, 255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY,
        11, 2
    )
    scale_percent = 200
    width = int(gray.shape[1] * scale_percent / 100)
    height = int(gray.shape[0] * scale_percent / 100)
    dim = (width, height)
    gray = cv2.resize(gray, dim, interpolation=cv2.INTER_LINEAR)
    return gray

def extract_digits(image):
    custom_config = r'-c tessedit_char_whitelist=,.0123456789,. --oem 3 --psm 6'
    text = pytesseract.image_to_string(image, config=custom_config)
    digits = ''.join(filter(str.isdigit, text))
    return text

if __name__ == "__main__":
    image_path = r'image/div/c8t.jpg'
    processed_image = preprocess_image(image_path)
    cv2.imwrite('przetworzony_cyfry.jpg', processed_image)
    digits = extract_digits(processed_image)
    print(f"Rozpoznane cyfry: {digits}")

Rozpoznane cyfry: 


In [19]:
import cv2
import pytesseract
import numpy as np

# Ścieżka do Tesseract OCR, jeśli nie jest dodana do PATH
# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
def increase_contrast(image):
    # Zastosowanie równoważenia histogramu
    return cv2.equalizeHist(image)

def reduce_noise(image):
    # Filtr medianowy
    return cv2.medianBlur(image, 3)

def morphological_operations(image):
    kernel = np.ones((2,2), np.uint8)
    return cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel)
    
def preprocess_image(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    scale_factor = 10
    width = img.shape[1] * scale_factor
    height = img.shape[0] * scale_factor
    dim = (width, height)
    resized = cv2.resize(img, dim, interpolation=cv2.INTER_NEAREST)
    
    # Zwiększenie kontrastu
    contrast = increase_contrast(resized)
    
    # Redukcja szumów
    denoised = reduce_noise(contrast)
    
    # Operacje morfologiczne
    morph = morphological_operations(denoised)
    
    # Binarizacja
    _, binary = cv2.threshold(morph, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
    
    return binary


def extract_digits(image):
    # Konfiguracja Tesseract dla cyfr
    custom_config = r'-c tessedit_char_whitelist=,.0123456789 --oem 3 --psm 10'

    # Wykonaj OCR
    text = pytesseract.image_to_string(image, config=custom_config)
    
    # Oczyszczenie wyniku
    digits = ''.join(filter(str.isdigit, text))
    return digits

if __name__ == "__main__":
    image_path =  r'image/div/c32.jpg'  # Podaj ścieżkę do obrazu
    processed_image = preprocess_image(image_path)
    
    # Opcjonalnie zapisz przetworzony obraz do weryfikacji
    cv2.imwrite('przetworzony_cyfry.png', processed_image)
    
    digits = extract_digits(processed_image)
    print(f"Rozpoznane cyfry: {digits}")

Rozpoznane cyfry: 


In [26]:
import cv2
import pytesseract

# Wczytaj obraz
image = cv2.imread(r'image/div/c8.jpg')

# Konwertuj do skali szarości
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Usuń szumy i zwiększ kontrast
gray = cv2.medianBlur(gray, 3)
gray = cv2.equalizeHist(gray)

# Binarizacja
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

# Morfologia
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)

# Skalowanie
scale_percent = 100
width = int(thresh.shape[1] * scale_percent / 100)
height = int(thresh.shape[0] * scale_percent / 100)
dim = (width, height)
thresh = cv2.resize(thresh, dim, interpolation=cv2.INTER_CUBIC)

# Odczyt tekstu
custom_config = r'--oem 3 --psm 6'
text = pytesseract.image_to_string(thresh, config=custom_config)

print("Odczytane liczby:", text)

Odczytane liczby: 


In [27]:
import pytesseract
from PIL import Image

# Ścieżka do pliku
image_path = r"image/div/c32.jpg"

# Otwieramy obraz
image = Image.open(image_path)

# Odczytujemy tekst z obrazu
text = pytesseract.image_to_string(image)

# Wyświetlamy odczytany tekst
print("Odczytany tekst:", text)

Odczytany tekst: 


In [28]:
import pytesseract
from PIL import Image, ImageEnhance, ImageFilter

# Otwieramy obraz
image_path =  r"image/div/c32.jpg"
image = Image.open(image_path)

# Konwertujemy obraz na skalę szarości
gray_image = image.convert('L')

# Zwiększamy kontrast obrazu
enhancer = ImageEnhance.Contrast(gray_image)
enhanced_image = enhancer.enhance(2)

# Wyostrzamy obraz
sharpened_image = enhanced_image.filter(ImageFilter.SHARPEN)

# Odczytujemy tekst z obrazu
text = pytesseract.image_to_string(sharpened_image, config='--psm 6')

# Wyświetlamy odczytany tekst
print("Odczytany tekst:", text)

Odczytany tekst: fe



In [38]:
import pytesseract
from PIL import Image, ImageEnhance, ImageFilter

# Otwieramy obraz
image_path = r"image/div/c8t.jpg"
image = Image.open(image_path)

# Konwertujemy obraz na skalę szarości
gray_image = image.convert('L')

# Zwiększamy rozmiar obrazu dla lepszej rozdzielczości
resized_image = gray_image.resize((gray_image.width * 2, gray_image.height * 2))#, Image.ANTIALIAS)

# Zwiększamy kontrast obrazu
enhancer = ImageEnhance.Contrast(resized_image)
enhanced_image = enhancer.enhance(2)

# Usuwamy szumy (threshold)
threshold_image = enhanced_image.point(lambda p: p > 128 and 255)

# Wyostrzamy obraz
sharpened_image = threshold_image.filter(ImageFilter.SHARPEN)

# Odczytujemy tekst z obrazu, stosując inny tryb 'psm'
text = pytesseract.image_to_string(sharpened_image, config='--psm 10')  # ps7 to tryb: single line

# Wyświetlamy odczytany tekst
print("Odczytany tekst:", text)

Odczytany tekst: 


In [45]:
import easyocr
from PIL import Image

# Otwieramy obraz
image_path = r"image/div/c8.jpg"
image = Image.open(image_path)

# Konwertujemy obraz na skalę szarości, jeśli jest taka potrzeba
gray_image = image.convert('L')

# Zapisujemy obraz tymczasowo, aby móc go przetworzyć za pomocą EasyOCR
temp_image_path = r"image/div/c8tx.jpg"
gray_image.save(temp_image_path)

# Tworzymy instancję EasyOCR
reader = easyocr.Reader(['en'])  # Zainicjujemy model tylko dla angielskiego (można dodać inne języki)

# Odczytujemy tekst z obrazu
result = reader.readtext(temp_image_path)

# Wyświetlamy wyniki
for (bbox, text, prob) in result:
    print(f"Odczytany tekst: {text} (Prawdopodobieństwo: {prob})")

Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Odczytany tekst: 146 600,00 (Prawdopodobieństwo: 0.5629606316038704)


In [50]:
import cv2
import pytesseract
from PIL import Image
import numpy as np

# Ścieżka do Tesseract (jeśli potrzebne)
# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# Ładowanie obrazu
image = cv2.imread(r"image/div/c32.jpg")

# Konwersja do skali szarości
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Usuwanie szumów
gray = cv2.medianBlur(gray, 3)

# Skalowanie obrazu
scale_percent = 200
width = int(gray.shape[1] * scale_percent / 100)
height = int(gray.shape[0] * scale_percent / 100)
dim = (width, height)
gray = cv2.resize(gray, dim, interpolation=cv2.INTER_CUBIC)

# Binarizacja
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

# Opcjonalne przetwarzanie morfologiczne
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)

# Konfiguracja Tesseract
custom_config = r'--oem 3 --psm 6 outputbase digits'

# Odczyt tekstu
text = pytesseract.image_to_string(thresh, config=custom_config)
print(text)




In [None]:
import cv2
import pytesseract
from PIL import Image
import numpy as np
from skimage.restoration import denoise_tv_chambolle
import easyocr

# Konfiguracja Tesseract (jeśli potrzebne)
# Dla Windows:
# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

def preprocess_image(image_path):
    # Wczytaj obraz
    image = cv2.imread(image_path)
    
    # Sprawdź, czy obraz został wczytany
    if image is None:
        print("Nie można wczytać obrazu. Sprawdź ścieżkę do pliku.")
        return None
    
    # Konwersja do skali szarości
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Usuwanie szumów
    gray = cv2.fastNlMeansDenoising(gray, None, 30, 7, 21)
    
    # Wyrównanie histogramu
    gray = cv2.equalizeHist(gray)
    
    # Wyostrzanie obrazu
    kernel_sharpening = np.array([[-1,-1,-1],
                                  [-1, 9,-1],
                                  [-1,-1,-1]])
    sharpened = cv2.filter2D(gray, -1, kernel_sharpening)
    
    # Skalowanie obrazu
    scale_percent = 300  # Zwiększenie rozmiaru o 300%
    width = int(sharpened.shape[1] * scale_percent / 100)
    height = int(sharpened.shape[0] * scale_percent / 100)
    dim = (width, height)
    resized = cv2.resize(sharpened, dim, interpolation=cv2.INTER_CUBIC)
    
    # Binarizacja adaptacyjna
    thresh = cv2.adaptiveThreshold(resized, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY_INV, 15, 3)
    
    # Operacje morfologiczne
    kernel = np.ones((2, 2), np.uint8)
    processed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    
    return processed

def read_text_pytesseract(processed_image):
    # Konfiguracja Tesseract
    custom_config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789'
    
    # Odczyt tekstu
    text = pytesseract.image_to_string(processed_image, config=custom_config)
    
    return text

def read_text_easyocr(image_path):
    # Inicjalizacja EasyOCR
    reader = easyocr.Reader(['en'], gpu=False)
    
    # Odczyt tekstu
    result = reader.readtext(image_path, detail=0, allowlist='0123456789')
    
    # Łączenie wyników w jeden ciąg
    text = ''.join(result)
    
    return text

def main():
    image_path = r"image/div/c8t.jpg"  # Upewnij się, że podajesz poprawną ścieżkę
    
    # Przetwarzanie obrazu
    processed_image = preprocess_image(image_path)
    
    if processed_image is None:
        return
    
    # Odczyt tekstu za pomocą Pytesseract
    text_pytesseract = read_text_pytesseract(processed_image)
    print("Odczytany tekst (Pytesseract):")
    print(text_pytesseract.strip())
    
    # Odczyt tekstu za pomocą EasyOCR
    text_easyocr = read_text_easyocr(image_path)
    print("Odczytany tekst (EasyOCR):")
    print(text_easyocr.strip())
    
    # Wyświetlenie przetworzonego obrazu (opcjonalnie)
    cv2.imshow('Przetworzony obraz', processed_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()

Using CPU. Note: This module is much faster with a GPU.


Odczytany tekst (Pytesseract):

Odczytany tekst (EasyOCR):



In [5]:
from mmocr.utils.ocr import MMOCR
ocr = MMOCR(det='TextSnake', recog=None)
ocr(r"image/div/c8.jpg", show=True, print_result=True)

ModuleNotFoundError: No module named 'mmocr.utils.ocr'

In [1]:
!pip install torch==2.0.0 torchvision==0.15.1
!pip install -U openmim
!mim install "mmengine>=0.7.1,<1.1.0"
!mim install "mmcv>=2.0.0rc4,<2.1.0"
!mim install "mmdet>=3.0.0rc5,<3.2.0"
!mim install mmocr

Collecting torch==2.0.0
  Downloading torch-2.0.0-cp311-cp311-win_amd64.whl.metadata (24 kB)
Collecting torchvision==0.15.1
  Downloading torchvision-0.15.1-cp311-cp311-win_amd64.whl.metadata (11 kB)
Downloading torch-2.0.0-cp311-cp311-win_amd64.whl (172.3 MB)
   ---------------------------------------- 0.0/172.3 MB ? eta -:--:--
   ---------------------------------------- 0.1/172.3 MB 1.9 MB/s eta 0:01:30
   ---------------------------------------- 0.1/172.3 MB 2.1 MB/s eta 0:01:22
   ---------------------------------------- 0.2/172.3 MB 1.9 MB/s eta 0:01:33
   ---------------------------------------- 0.3/172.3 MB 2.0 MB/s eta 0:01:28
   ---------------------------------------- 0.3/172.3 MB 2.0 MB/s eta 0:01:28
   ---------------------------------------- 0.4/172.3 MB 1.9 MB/s eta 0:01:31
   ---------------------------------------- 0.4/172.3 MB 1.7 MB/s eta 0:01:39
   ---------------------------------------- 0.5/172.3 MB 1.8 MB/s eta 0:01:38
   ---------------------------------------- 

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
surya-ocr 0.5.0 requires torch<3.0.0,>=2.3.0, but you have torch 2.0.0 which is incompatible.
torchaudio 2.4.0+cu118 requires torch==2.4.0+cu118, but you have torch 2.0.0 which is incompatible.

[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting openmim
  Downloading openmim-0.3.9-py2.py3-none-any.whl.metadata (16 kB)
Collecting model-index (from openmim)
  Downloading model_index-0.1.11-py3-none-any.whl.metadata (3.9 kB)
Collecting opendatalab (from openmim)
  Downloading opendatalab-0.0.10-py3-none-any.whl.metadata (6.4 kB)
Collecting ordered-set (from model-index->openmim)
  Downloading ordered_set-4.1.0-py3-none-any.whl.metadata (5.3 kB)
Collecting openxlab (from opendatalab->openmim)
  Downloading openxlab-0.1.1-py3-none-any.whl.metadata (3.8 kB)
Collecting filelock~=3.14.0 (from openxlab->opendatalab->openmim)
  Downloading filelock-3.14.0-py3-none-any.whl.metadata (2.8 kB)
Collecting oss2~=2.17.0 (from openxlab->opendatalab->openmim)
  Downloading oss2-2.17.0.tar.gz (259 kB)
     ---------------------------------------- 0.0/259.5 kB ? eta -:--:--
     ---- ---------------------------------- 30.7/259.5 kB 1.3 MB/s eta 0:00:01
     ----------------- -------------------- 122.9/259.5 kB 1.4 MB/s eta 0:00:01
     

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
chatgptautomation 0.7.3 requires requests==2.31.0, but you have requests 2.28.2 which is incompatible.
jupyterlab-server 2.25.2 requires requests>=2.31, but you have requests 2.28.2 which is incompatible.
qianfan 0.3.1 requires python-dotenv<=0.21.1, but you have python-dotenv 1.0.0 which is incompatible.
tensorflow-intel 2.15.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 3.20.2 which is incompatible.
torchaudio 2.4.0+cu118 requires torch==2.4.0+cu118, but you have torch 2.0.0 which is incompatible.
yfinance 0.2.38 requires requests>=2.31, but you have requests 2.28.2 which is incompatible.

[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


Looking in links: https://download.openmmlab.com/mmcv/dist/cpu/torch2.0.0/index.html



[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


Looking in links: https://download.openmmlab.com/mmcv/dist/cpu/torch2.0.0/index.html
Collecting mmcv<2.1.0,>=2.0.0rc4
  Downloading https://download.openmmlab.com/mmcv/dist/cpu/torch2.0.0/mmcv-2.0.1-cp311-cp311-win_amd64.whl (982 kB)
     ---------------------------------------- 0.0/982.9 kB ? eta -:--:--
      ------------------------------------ 20.5/982.9 kB 330.3 kB/s eta 0:00:03
     - ----------------------------------- 30.7/982.9 kB 660.6 kB/s eta 0:00:02
     - ----------------------------------- 30.7/982.9 kB 660.6 kB/s eta 0:00:02
     - ----------------------------------- 30.7/982.9 kB 660.6 kB/s eta 0:00:02
     --- --------------------------------- 81.9/982.9 kB 416.7 kB/s eta 0:00:03
     --- --------------------------------- 81.9/982.9 kB 416.7 kB/s eta 0:00:03
     --- --------------------------------- 81.9/982.9 kB 416.7 kB/s eta 0:00:03
     --- --------------------------------- 81.9/982.9 kB 416.7 kB/s eta 0:00:03
     --- --------------------------------- 81.9/982.9

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
mmocr 1.0.1 requires mmdet<3.2.0,>=3.0.0rc5; extra == "mim", but you have mmdet 3.3.0 which is incompatible.

[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


Looking in links: https://download.openmmlab.com/mmcv/dist/cpu/torch2.0.0/index.html
Collecting mmdet<3.2.0,>=3.0.0rc5
  Downloading mmdet-3.1.0-py3-none-any.whl.metadata (28 kB)
Ignoring mmcv: markers 'extra == "mim"' don't match your environment
Ignoring mmengine: markers 'extra == "mim"' don't match your environment
Downloading mmdet-3.1.0-py3-none-any.whl (2.0 MB)
   ---------------------------------------- 0.0/2.0 MB ? eta -:--:--
   - -------------------------------------- 0.1/2.0 MB 2.6 MB/s eta 0:00:01
   ---- ----------------------------------- 0.2/2.0 MB 3.1 MB/s eta 0:00:01
   --------- ------------------------------ 0.5/2.0 MB 4.2 MB/s eta 0:00:01
   --------------- ------------------------ 0.8/2.0 MB 5.3 MB/s eta 0:00:01
   ---------------------- ----------------- 1.1/2.0 MB 6.5 MB/s eta 0:00:01
   ------------------------------- -------- 1.6/2.0 MB 7.1 MB/s eta 0:00:01
   ---------------------------------------  2.0/2.0 MB 7.9 MB/s eta 0:00:01
   -------------------------


[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


Looking in links: https://download.openmmlab.com/mmcv/dist/cpu/torch2.0.0/index.html
Ignoring mmcv: markers 'extra == "mim"' don't match your environment
Ignoring mmdet: markers 'extra == "mim"' don't match your environment
Ignoring mmengine: markers 'extra == "mim"' don't match your environment



[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [23]:
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
import requests
from PIL import Image

processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")

# load image from the IAM dataset
url = r"image/div/c8t.jpg"
image = Image.open(r"image/div/c8t.jpg").convert("RGB")

pixel_values = processor(image, return_tensors="pt").pixel_values
generated_ids = model.generate(pixel_values)

generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Some weights of VisionEncoderDecoderModel were not initialized from the model checkpoint at microsoft/trocr-base-printed and are newly initialized: ['encoder.pooler.dense.bias', 'encoder.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [24]:
generated_text

'</s>90.</s>'

In [25]:
url = r"image/div/c7.jpg"
image = Image.open(r"image/div/c7.jpg")#.convert("RGB")

pixel_values = processor(image, return_tensors="pt").pixel_values
generated_ids = model.generate(pixel_values)

generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
generated_text

'</s>81. 44168,72</s>'