In [60]:
import io
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "apikey.json"

from google.cloud import vision, texttospeech, translate
from google.cloud.vision import types
from PIL import Image

import cv2 as cv

In [61]:
class Virtual_Eyes:
    
    def __init__(self): #analzye_type; resim analizi için 0, yazı tanıma için 1
        self.imgAnnotator = vision.ImageAnnotatorClient()
        self.translator = translate.Client()
        self.textToSpeech = texttospeech.TextToSpeechClient()
    
    
    def take_a_photo(self):
        try:
            cap = cv.VideoCapture(0) # 0 indexli kameradan görüntü alıyoruz
            ret, frame = cap.read() # kamera görüntüsünü alıyoruz
            b, g, r = cv.split(frame)
            photo = cv.merge([r, g, b]) # bir nevi renklendirme yapıyoruz
            cap.release()
            
            cv.imwrite(os.getcwd() + "/imgs/takenphoto.jpg", photo) # resim dosya olarak kaydediliyor
            return True # resim çekilip kaydedildi
        except:
            return False # hata çıktı demektir
    
    
    def make_image_analysis(self):
        try:
            with io.open(os.getcwd() + "/imgs/takenphoto.jpg", 'rb') as image_file:
                img = image_file.read()
            
            image = types.Image(content=img)

            list_of_findings = []
            response = self.imgAnnotator.label_detection(image=image)
            for label in response.label_annotations:
                list_of_findings.append(label.description)
            
            # bulgular İngilizce'dir, Türkçe'ye çevrilmesi için fonksiyon çağırıyoruz
            return self.translate_texts(list_of_findings) or None # eğer bulgu yoksa None döndürür
        except:
            return None # eğer hata varsa None döndürür
    
    
    def make_text_analysis(self):
        try:
            with io.open(os.getcwd() + "/imgs/takenphoto.jpg", 'rb') as image_file:
                img = image_file.read()

            image = types.Image(content=img)
            
            list_of_findings = []
            response = self.imgAnnotator.text_detection(image=image)
            for text in response.text_annotations:
                list_of_findings.append(text.description)
            
            # list_of_findings burada düzeltilmelidir
            
            return ' '.join(list_of_findings) or None
        except:
            return None
    
    
    def translate_texts(self, list_of_texts):
        text = ', '.join(list_of_texts)
        target = 'tr' # çevirisi yapılacak dil Türkçe için tr
        source = 'en' # kaynak dil
        
        translation = self.translator.translate(text, source_language=source, target_language=target)
        return translation['translatedText'] or None # çevrilen yazı cümle olarak çevrilir, eğer yoksa None döner
    
    
    def text_to_speech(self, text):
        try:
            synthesis_input = texttospeech.types.SynthesisInput(text=text)
            voice = texttospeech.types.VoiceSelectionParams(
                language_code='tr-TR',
                ssml_gender=texttospeech.enums.SsmlVoiceGender.NEUTRAL)

            audio_config = texttospeech.types.AudioConfig(
                audio_encoding=texttospeech.enums.AudioEncoding.MP3)

            response = self.textToSpeech.synthesize_speech(synthesis_input, voice, audio_config)

            # output.mp3 oluşturulacak dosya
            with open(os.getcwd() + "/speechs/output.mp3", 'wb') as out:
                out.write(response.audio_content)
            
            return True
        except:
            return False
            
    
    def display_speech(self, speech):
        try:
            if speech == "display_the_speech":
                os.startfile(os.getcwd() + "/speechs/output.mp3")
        except:
            pass
    
    
    def img_analysis(self):
        photo = self.take_a_photo()
        
        if photo:
            findings = self.make_image_analysis()
            
            if findings:
                voiced = self.text_to_speech(findings)
                print(findings)
                
                if voiced:
                    self.display_speech("display_the_speech")
                    
                else:
                    self.display_speech("error_of_voicing")
            else:
                self.display_speech("there_is_no_findings")
        else:
            self.display_speech("photo_could_not_taken")
    
    
    def text_analysis(self):
        photo = self.take_a_photo()
        
        if photo:
            findings = self.make_text_analysis()
            
            if findings:
                voiced = self.text_to_speech(findings)
                print(findings)
                
                if voiced:
                    self.display_speech("display_the_speech")
                    
                else:
                    self.display_speech("voicing_error")
            else:
                self.display_speech("there_is_no_text")
        else:
            self.display_speech("photo_could_not_taken")

In [62]:
virtual_eyes = Virtual_Eyes()

In [63]:
virtual_eyes.img_analysis()

Siyah, Karanlık, Beyaz, Gökyüzü, Metin, Yeşil, Kırmızı, Işık, Kahverengi, Yazı Tipi


In [64]:
virtual_eyes.text_analysis()