In [None]:
import os
import fitz
import datetime
import platform
import time
from pptx import Presentation
from pptx.util import Pt,Inches
from pptx.dml.color import ColorFormat, RGBColor
from tqdm import tqdm
from PIL import Image
# from pdf2image import convert_from_path,convert_from_bytes
import tempfile
# from pdf2image.exceptions import (
#     PDFInfoNotInstalledError,
#     PDFPageCountError,
#     PDFSyntaxError
# )

class pdfToPPTX:
    def __init__(self,pdfname, imagePath, *pageNum):
        #涉及不同的操作系统，需转换
        thissys=platform.platform().lower() 
        if 'windows' in thissys:
            print("windows 系统 \n")
            self.pdfname=pdfname.replace("/","\\")
            self.imagePath=imagePath.replace("/","\\")
        elif 'linux' in thissys:
            print("linux 系统 \n")
            self.pdfname=pdfname.replace("\\","\/")
            self.imagePath=imagePath.replace("\\","\/")                                
        self.pagenum=pageNum
        
    def pdf2image(self):
        pgns=self.outpageNum()

        
        with tempfile.TemporaryDirectory() as path:
            print("正在读取Pdf文件…… \n")
            images_from_path = convert_from_path(self.pdfname)
            print("文件总页数 {} 页。 \n".format(len(images_from_path)))
            print("正在转换…… \n")   
            time.sleep(0.5)
            for pgn in tqdm(pgns):
                if not os.path.exists(self.imagePath):
                    os.makedirs(self.imagePath)
                images_from_path[int(pgn)].save(self.imagePath+'/'+'psReport_%s.jpg' % str(int(pgn+1)).zfill(3),'jpeg')
#             print(images_from_path)
        print("done")
    
    def image_adj(self):        
        n=0
        print("*正在调整图片大小...")        
        for pic in os.listdir(self.imagePath):            
            if pic[-3:].lower()=="png":
                _img=Image.open(os.path.join(self.imagePath,pic))
                w,h=_img.size
                left,top=0,15
                right,bottom=left+w,top+w/1.77                
                img=_img.crop((left,top,right,bottom))
                img.save(os.path.join(self.imagePath,pic))  
                n+=1
        print("……完成。调整图片至{0}x{1}，共{2}张。 \n".format(w,int(w/1.77),n))                                                

    def outpageNum(self):
        pgn=[]
        
        if isinstance(self.pagenum[0],str):
            if "-" in self.pagenum[0]:
                firstpage=self.pagenum[0][:self.pagenum[0].find("-")]
                lastpage=self.pagenum[0][self.pagenum[0].find("-")+1:]
                for i in range(int(firstpage)-1,int(lastpage)-1):
                    pgn.append(i)
                pgn.append(int(lastpage)-1) #包括最后一页
            else:
                pass
        else:
            pgn=[int(x)-1 for x in self.pagenum]

        return pgn
    
    def pyMuPDF_fitz(self):
        print("*正在从pdf生成图片")
        pgns=self.outpageNum()
        
#         startTime_pdf2img = datetime.datetime.now()#开始时间
#         print("imagePath="+self.imagePath)        
        
        pdfDoc = fitz.open(self.pdfname)
        
            
        totalPageNum=pdfDoc.pageCount
        n=0
        for pg in pgns:
            page = pdfDoc[pg]
            rotate = int(0)
            # 每个尺寸的缩放系数为1.3，这将为我们生成分辨率提高2.6的图像。
            # 此处若是不做设置，默认图片大小为：792X612, dpi=96
            zoom_x = 1.33333333 #(1.33333333-->1056x816)   (2-->1584x1224)
            zoom_y = 1.33333333
            mat = fitz.Matrix(zoom_x, zoom_y).preRotate(rotate)
            pix = page.getPixmap(matrix=mat, alpha=False)

            pix.writePNG(self.imagePath+'/'+'images_%s.png' % str(pg+1).zfill(3))#将图片写入指定的文件夹内
            n+=1
        endTime_pdf2img = datetime.datetime.now()#结束时间
#         print('pdf2img时间=',(endTime_pdf2img - startTime_pdf2img).seconds)
        print("……完成。生成图片 {} 张。\n".format(n))
        
    def toPPT(self):
        print("正在生成PPTX文件")
        prs = Presentation()
        prs.slide_height = 6858000    #设置ppt的高度
        prs.slide_width = 12192000    #设置ppt的宽度

        n=1
        pics=os.listdir(self.imagePath)
        pics.sort() #排序，否则PPT中的图片是乱序
        for pic in pics:
            if pic[-3:].lower()=="png":
    #             print(os.path.join(imgpath,pic))   
                _pic=os.path.join(self.imagePath,pic)

                left, top, width, height = Inches(1), Inches(0.5), Inches(11.5), Inches(2)  # 预设位置及大小  
                slide = prs.slides.add_slide(prs.slide_layouts[5]) #prs.slide_layouts中一共预存有1-48种，采用第6六种为空白幻灯片

                body_shape = slide.shapes.placeholders  # body_shape为本页ppt中所有shapes            

#                 body_shape[0].text = '这个是测试标题'  # 在第一个文本框中文字框架内添加文字
#                 body_shape[0].text_frame.paragraphs[0].font.name="微软雅黑"

                pic = slide.shapes.add_picture(_pic, left, top,width)  # 在指定位置按预设值添加图
                slide.shapes._spTree.insert(1, pic._element)   

                left,top,wt,ht=Inches(10),Inches(6),Inches(1),Inches(1)  # 预设位置及大小
                textbox = slide.shapes.add_textbox(left, top, wt, ht)  # left，top为相对位置，width，height为文本框大小
                textbox.text=str(n)

                textbox.text_frame.paragraphs[0].font.name='微软雅黑'
                textbox.text_frame.paragraphs[0].font.size = Pt(65)
                textbox.text_frame.paragraphs[0].font.color.rgb = RGBColor(255,0,0)
                                                                                

    #             textbox.text = 'this is a new textbox'  # 文本框中文字
    #             new_para = textbox.text_frame.add_paragraph()  # 在新文本框中添加段落
    #             new_para.text = 'this is second para in textbox'  # 段落文字

                n+=1

                prs.save(os.path.join(self.imagePath,'pic.pptx'))
        print("……完成\n")
        
        
    
if __name__=="__main__":
#骆的本本
#     pdf='/home/luo/data/大智小超/乐高/图纸/100份wedo2.0图纸.pdf'
#     jpg='/home/luo/data/大智小超/乐高/图纸/output_pdf'
#我的本本
#     pdf='/home/jack/文档/大智小超/100份wedo2.0图纸.pdf'
#     jpg='/home/jack/文档/大智小超/output_pdf'
#谷子的机器
    pdf='i:\\大智小超\\乐高\\2008山寨塔罗师.pdf'
    jpg='i:\\大智小超\\乐高\\output_pdf'

    pdf=pdfToPPTX(pdf,jpg,"1-3")
#     pdf.pdf2image()
#     pdf.pyMuPDF_fitz()
#     pdf.image_adj()
    pdf.toPPT()

In [None]:
import os
from pptx import Presentation
from pptx.dml.color import RGBColor
from pptx.util import Pt,Inches

def toppt():
    imgpath="/home/jack/文档/大智小超/output_pdf"
    prs = Presentation()
    prs.slide_height = 6858000    #设置ppt的高度
    prs.slide_width = 12192000    #设置ppt的宽度
        
    n=1
    for pic in os.listdir(imgpath):
        if pic[-3:].lower()=="png":
#             print(os.path.join(imgpath,pic))   
            _pic=os.path.join(imgpath,pic)

            left, top, width, height = Inches(1), Inches(0.5), Inches(11.5), Inches(2)  # 预设位置及大小  
            slide = prs.slides.add_slide(prs.slide_layouts[5]) #prs.slide_layouts中一共预存有1-48种，采用第6六种为空白幻灯片

            body_shape = slide.shapes.placeholders  # body_shape为本页ppt中所有shapes            
            
            body_shape[0].text = '这个是测试标题'  # 在第一个文本框中文字框架内添加文字
            body_shape[0].text_frame.paragraphs[0].font.name="微软雅黑"

            pic = slide.shapes.add_picture(_pic, left, top,width)  # 在指定位置按预设值添加图
            slide.shapes._spTree.insert(1, pic._element)   
            
            left,top,wt,ht=Inches(1),Inches(6),Inches(1),Inches(1)  # 预设位置及大小
            textbox = slide.shapes.add_textbox(left, top, wt, ht)  # left，top为相对位置，width，height为文本框大小
            textbox.text=str(n)
            
            textbox.text_frame.paragraphs[0].font.name='微软雅黑'
            textbox.text_frame.paragraphs[0].font.size = Pt(50)
            textbox.text_frame.paragraphs[0].font.color.rgb = RGBColor(255,0,0)
            
            
#             textbox.text = 'this is a new textbox'  # 文本框中文字
#             new_para = textbox.text_frame.add_paragraph()  # 在新文本框中添加段落
#             new_para.text = 'this is second para in textbox'  # 段落文字

            n+=1


            prs.save(os.path.join(imgpath,'pic.pptx'))
    print('all done')

toppt()