-
-
Notifications
You must be signed in to change notification settings - Fork 3.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fb2 to html5 loose images after conversion #5847
Comments
Have you tried with |
No, I didn't know about that parameter. With even Is there any way to convert and put base64 into For part I have some example from other languageThis is python language and it determine a lot types of media, however if you remove and leave only to, probably, Built-in libraries from code example: def get_image_metadata(file_path: str):
size = os.path.getsize(file_path)
with open(file_path, "rb") as input:
width, height = -1, -1
data = input.read(32)
if (size >= 10) and data[:6] in (b"GIF87a", b"GIF89a"):
# GIF
type, format = "img", GIF
w, h = struct.unpack("<HH", data[6:10])
width, height = int(w), int(h)
elif ((size >= 24) and data.startswith(b"\211PNG\r\n\032\n") and (data[12:16] == b"IHDR")):
# PNG
type, format = "img", PNG
w, h = struct.unpack(">LL", data[16:24])
width, height = int(w), int(h)
elif (size >= 16) and data.startswith(b"\211PNG\r\n\032\n"):
# older PNG
type, format = "img", PNG
w, h = struct.unpack(">LL", data[8:16])
width, height = int(w), int(h)
elif (size >= 2) and data.startswith(b"\377\330"):
# JPEG
type, format = "img", JPEG
input.seek(0)
input.read(2)
b = input.read(1)
try:
while (b and ord(b) != 0xDA):
while (ord(b) != 0xFF):
b = input.read(1)
while (ord(b) == 0xFF):
b = input.read(1)
if (ord(b) >= 0xC0 and ord(b) <= 0xC3):
input.read(3)
h, w = struct.unpack(">HH", input.read(4))
break
else:
input.read(int(struct.unpack(">H", input.read(2))[0]) - 2)
b = input.read(1)
width, height = int(w), int(h)
except struct.error as e:
return None # UnknownImageFormat("StructError" + msg)
except ValueError as e:
return None # UnknownImageFormat("ValueError" + msg)
except Exception as e:
return None # UnknownImageFormat(e.__class__.__name__)
elif (size >= 26) and data.startswith(b"BM"):
# BMP
type, format = "img", BMP
headersize = struct.unpack("<I", data[14:18])[0]
if headersize == 12:
w, h = struct.unpack("<HH", data[18:22])
width, height = int(w), int(h)
elif headersize >= 40:
w, h = struct.unpack("<ii", data[18:26])
width, height = int(w), abs(int(h)) # as h is negative when stored upside down
else:
return None # UnknownImageFormat("Unkown DIB header size:" + str(headersize))
elif (size >= 8) and data[:4] in (b"II\052\000", b"MM\000\052"):
# Standard TIFF, big- or little-endian
# BigTIFF and other different but TIFF-like formats are not
# supported currently
type, format = "img", TIFF
byteOrder = data[:2]
boChar = ">" if byteOrder == "MM" else "<"
# maps TIFF type id to size (in bytes)
# and python format char for struct
tiffTypes = {
1: (1, boChar + "B"), # BYTE
2: (1, boChar + "c"), # ASCII
3: (2, boChar + "H"), # SHORT
4: (4, boChar + "L"), # LONG
5: (8, boChar + "LL"), # RATIONAL
6: (1, boChar + "b"), # SBYTE
7: (1, boChar + "c"), # UNDEFINED
8: (2, boChar + "h"), # SSHORT
9: (4, boChar + "l"), # SLONG
10: (8, boChar + "ll"), # SRATIONAL
11: (4, boChar + "f"), # FLOAT
12: (8, boChar + "d") # DOUBLE
}
ifdOffset = struct.unpack(boChar + "L", data[4:8])[0]
try:
countSize = 2
input.seek(ifdOffset)
ec = input.read(countSize)
ifdEntryCount = struct.unpack(boChar + "H", ec)[0]
# 2 bytes: TagId + 2 bytes: type + 4 bytes: count of values + 4
# bytes: value offset
ifdEntrySize = 12
for i in range(ifdEntryCount):
entryOffset = ifdOffset + countSize + i * ifdEntrySize
input.seek(entryOffset)
tag = input.read(2)
tag = struct.unpack(boChar + "H", tag)[0]
if (tag == 256 or tag == 257):
# if type indicates that value fits into 4 bytes, value
# offset is not an offset but value itself
type = input.read(2)
type = struct.unpack(boChar + "H", type)[0]
if type not in tiffTypes:
return None # UnknownImageFormat("Unkown TIFF field type:" + str(type))
typeSize, typeChar = tiffTypes[type][0:2]
input.seek(entryOffset + 8)
value = input.read(typeSize)
value = int(struct.unpack(typeChar, value)[0])
if tag == 256:
width = value
else:
height = value
if width > -1 and height > -1:
break
except Exception as e:
return None
elif size >= 0 and data[4:8] == b"ftyp" and data[8:12] in [b"isom", b"mp42", b"mp41", b"qt "]:
# MP4
type, format = "video", MP4
elif size >= 0 and data[24:32] == b"matroska":
# MKV
type, format = "video", MKV
elif size >= 0 and data[26:30] == b"webm":
# WEBM
type, format = "video", WEBM
elif size >= 0 and data[0:4] == b"RIFF" and data[8:12] == b"WEBP":
# WEBP https://developers.google.com/speed/webp/docs/riff_container
type, format = "img", WEBP
# static
if data[12:16] == b"VP8X":
width = int.from_bytes(data[24:27], byteorder="little") + 1
height = int.from_bytes(data[27:30], byteorder="little") + 1
elif data[12:16] == b"ANMF":
width = int.from_bytes(data[18:21], byteorder="little") + 1
height = int.from_bytes(data[21:24], byteorder="little") + 1
else:
return None
elif size >= 2: #correct that
# see http://en.wikipedia.org/wiki/ICO_(file_format)
type, format = "img", ICO
input.seek(0)
reserved = input.read(2)
if 0 != struct.unpack("<H", reserved)[0]:
return None
format = input.read(2)
if 1 != struct.unpack("<H", format)[0]:
return None
num = input.read(2)
num = struct.unpack("<H", num)[0]
if num > 1:
import warnings
warnings.warn("ICO File contains more than one image")
# http://msdn.microsoft.com/en-us/library/ms997538.aspx
w, h = input.read(1), input.read(1)
width, height = ord(w), ord(h)
else:
return None
return {"path": file_path, "type": type, "format": format, "file_size": size, "width": width, "height": height} |
Also, this option can be useful, to built-in images into the result: |
pandoc 2.7.3
Compiled with pandoc-types 1.17.5.4, texmath 0.11.2.2, skylighting 0.8.1
I have fb2 with images, however I need to create html5 file to be able to read it later, but the problem, what image data are lost after conversion.
Code example:
between binary tags there a lot of data, image itself, and there several images used there.
Tags are converted into this:
But image info are not copied.
What you can do here is paste base64 images straight into src tag, example 1x1px transparent png:
The text was updated successfully, but these errors were encountered: